Skip to content

Commit 05dbeec

Browse files
authored
Author page for Jing Huang of Stanford (closes #3506) (#5823)
1 parent 94db3bc commit 05dbeec

23 files changed

+136
-74
lines changed

bin/add_author_id.py

Lines changed: 103 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,138 @@
11
#!/usr/bin/env python3
2-
# -*- coding: utf-8 -*-
3-
#
4-
# Copyright 2022 Matt Post <[email protected]>
5-
#
6-
# Licensed under the Apache License, Version 2.0 (the "License");
7-
# you may not use this file except in compliance with the License.
8-
# You may obtain a copy of the License at
9-
#
10-
# http://www.apache.org/licenses/LICENSE-2.0
11-
#
12-
# Unless required by applicable law or agreed to in writing, software
13-
# distributed under the License is distributed on an "AS IS" BASIS,
14-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15-
# See the License for the specific language governing permissions and
16-
# limitations under the License.
2+
# -*- coding: utf-8 -*-
3+
"""Add an author ID to NameSpecification entries using the acl_anthology module.
174
18-
"""
19-
Adds an ID tag to all instances of an author in all XML files where there is no ID tag.
20-
21-
First use case was the Bill Byrne separation of July 2022.
22-
23-
2020.gebnlp-1.4 E14-1026 E14-1028 W16-2324 2021.acl-long.55 2021.eancs-1.2 W15-0116 D19-1125 D19-1331 D19-1459 P14-3000 2022.naacl-main.136 W18-1821 W18-5420 W18-6427 2020.nlp4call-1.2 N19-1406 2021.emnlp-main.620 2021.emnlp-main.666 N18-2081 N18-3013 W17-3531 2020.wmt-1.94 D15-1273 2022.nlp4convai-1.7 P16-2049 C14-1195 P19-1022 W19-4417 W19-4424 W19-5340 W19-5421 2020.wat-1.21 E17-2058 2022.ecnlp-1.13 J14-3008 N15-1041 N15-1105 P18-2051 D17-1208 D17-1220 D17-2005 2020.acl-main.690 2020.acl-main.693 N16-1100 2022.findings-acl.223 2022.findings-acl.301
5+
This script adds the name ID to all papers matching the first and last name.
6+
It will use the module to find the list of papers to edit. Alternately, you
7+
provide it with the list of papers.
248
259
Usage:
26-
27-
./add_author_id.py bill-byrne --last-name Byrne --first-name Bill
10+
./add_author_id.py <id> "Last name[, First name]" [--paper-ids 2028.acl-main.74 ...]
2811
"""
2912

30-
import argparse
31-
import os
13+
from __future__ import annotations
3214

33-
from pathlib import Path
34-
from anthology.utils import indent
15+
import argparse
16+
from collections import defaultdict
3517
from itertools import chain
18+
from pathlib import Path
19+
20+
from acl_anthology.anthology import Anthology
3621

22+
# old library since we're still editing XML files
23+
from anthology.utils import indent
3724
import lxml.etree as ET
3825

3926

40-
def main(args):
41-
for xml_file in Path(args.data_dir).glob("**/*.xml"):
42-
changed_one = False
27+
def main(args: argparse.Namespace) -> None:
28+
29+
last_name, first_name = (
30+
args.name.split(", ") if ", " in args.name else (args.name, None)
31+
)
32+
33+
anthology = Anthology(args.data_dir, verbose=True)
34+
35+
# Build a collection of the set of papers to modify within each XML file
36+
collection_to_paper_map = defaultdict(list)
37+
38+
if args.paper_ids:
39+
for paper_id in args.paper_ids:
40+
paper = anthology.get_paper(paper_id)
41+
if paper:
42+
collection_to_paper_map[paper.collection_id].append(paper.full_id_tuple)
43+
44+
else:
45+
people = anthology.find_people(args.name)
46+
if not people:
47+
print(f"No person found matching name {args.name}")
48+
49+
# find the person with the non-explicit ID
50+
for person in people:
51+
if not person.is_explicit:
52+
break
53+
54+
if not person:
55+
print(f"No person found matching name {args.name} with an explicit ID")
56+
return
57+
58+
for paper in person.papers():
59+
collection_to_paper_map[paper.collection_id].append(paper.full_id_tuple)
60+
61+
if collection_to_paper_map:
62+
print("Will edit the following paper IDs:")
63+
for paper_id_tuples in collection_to_paper_map.values():
64+
for paper_id in paper_id_tuples:
65+
print(f" - {paper_id}")
66+
67+
# Now iterate over those files and the papers within them
68+
for collection_id, paper_id_tuples in collection_to_paper_map.items():
69+
xml_file = Path(args.data_dir) / "xml" / f"{collection_id}.xml"
4370

4471
tree = ET.parse(xml_file)
45-
for paper_xml in chain(
46-
tree.getroot().findall(".//paper"), tree.getroot().findall(".//meta")
47-
):
72+
73+
for paper_tuple in paper_id_tuples:
74+
_, volume_id, paper_id = paper_tuple
75+
76+
# Get the paper
77+
paper_xml = tree.getroot().find(
78+
f"./volume[@id='{volume_id}']/paper[@id='{paper_id}']"
79+
)
80+
4881
for author_xml in chain(
4982
paper_xml.findall("./author"), paper_xml.findall("./editor")
5083
):
5184
if "id" in author_xml.attrib:
5285
continue
53-
last_name = author_xml.find("./last").text
5486
try:
55-
first_name = author_xml.find("./first").text
87+
author_first_name = author_xml.find("./first").text
5688
except AttributeError:
57-
first_name = ""
58-
if last_name == args.last_name and first_name == args.first_name:
89+
author_first_name = None
90+
author_last_name = author_xml.find("./last").text
91+
92+
if author_last_name == last_name and author_first_name == first_name:
5993
paper_id = (
6094
paper_xml.attrib["id"] if paper_xml.text == "paper" else "0"
6195
)
62-
anth_id = f"{xml_file}/{paper_id}"
63-
print(f"Adding {args.id} to {anth_id}...")
96+
paper_id = anthology.get_paper(paper_tuple).full_id
97+
print(
98+
f"Adding {args.id} to {author_first_name} {author_last_name} on paper {paper_id}..."
99+
)
64100
author_xml.attrib["id"] = args.id
65-
changed_one = True
66101

67-
if changed_one:
68-
indent(tree.getroot())
69-
tree.write(xml_file, encoding="UTF-8", xml_declaration=True)
102+
indent(tree.getroot())
103+
tree.write(xml_file, encoding="UTF-8", xml_declaration=True)
104+
105+
"""
106+
Once we have the module published, we should be able to modify this to use
107+
it to write the changed XML files, instead of the above.
108+
"""
109+
# for paper in person.papers():
110+
# print("PAPER", paper.full_id)
111+
# authors = paper.get_editors() if paper.is_frontmatter else paper.authors
112+
# for author in authors:
113+
# if author.name in person.names:
114+
# print("-> Found", author)
115+
# author.id = args.id
116+
# # collection_paper_map[paper.collection_id].append(paper.full_id)
117+
118+
# # save the anthology (doesn't currently work)
119+
# anthology.save_all()
70120

71121

72122
if __name__ == "__main__":
73-
parser = argparse.ArgumentParser()
123+
parser = argparse.ArgumentParser("Add an author ID to all of an author's papers")
74124
parser.add_argument("id", help="Author ID to add")
75-
parser.add_argument("--last-name", help="Author's last name")
76-
parser.add_argument("--first-name", help="Author's first name")
77-
parser.add_argument("--confirm", action="store_true", help="Confirm each instance")
125+
parser.add_argument("name", help="Author's name (last[, first])")
126+
parser.add_argument("--paper-ids", nargs="*", help="List of paper IDs to modify")
78127
parser.add_argument(
79-
"--data-dir", default=os.path.join(os.path.dirname(__file__), "..", "data", "xml")
128+
"--data-dir",
129+
default=None,
130+
help="Path to anthology data directory (default: ../data relative to repository root)",
80131
)
81132
args = parser.parse_args()
133+
# Normalize data_dir to a Path string used by Anthology
134+
# If the user supplies a path, trust it; otherwise compute relative to this script
135+
if args.data_dir is None:
136+
args.data_dir = str(Path(__file__).parent.parent / "data")
82137

83138
main(args)

data/xml/2020.acl.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3245,7 +3245,7 @@
32453245
<paper id="241">
32463246
<title>Orthogonal Relation Transforms with Graph Context Modeling for Knowledge Graph Embedding</title>
32473247
<author><first>Yun</first><last>Tang</last></author>
3248-
<author><first>Jing</first><last>Huang</last></author>
3248+
<author id="jing-huang"><first>Jing</first><last>Huang</last></author>
32493249
<author><first>Guangtao</first><last>Wang</last></author>
32503250
<author><first>Xiaodong</first><last>He</last></author>
32513251
<author><first>Bowen</first><last>Zhou</last></author>

data/xml/2021.naacl.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2814,7 +2814,7 @@
28142814
<author><first>Kevin</first><last>Huang</last></author>
28152815
<author><first>Tengyu</first><last>Ma</last></author>
28162816
<author><first>Quanquan</first><last>Gu</last></author>
2817-
<author><first>Jing</first><last>Huang</last></author>
2817+
<author id="jing-huang"><first>Jing</first><last>Huang</last></author>
28182818
<pages>2609–2615</pages>
28192819
<abstract>First-order meta-learning algorithms have been widely used in practice to learn initial model parameters that can be quickly adapted to new tasks due to their efficiency and effectiveness. However, existing studies find that meta-learner can overfit to some specific adaptation when we have heterogeneous tasks, leading to significantly degraded performance. In Natural Language Processing (NLP) applications, datasets are often diverse and each task has its unique characteristics. Therefore, to address the overfitting issue when applying first-order meta-learning to NLP applications, we propose to reduce the variance of the gradient estimator used in task adaptation. To this end, we develop a variance-reduced first-order meta-learning algorithm. The core of our algorithm is to introduce a novel variance reduction term to the gradient estimation when performing the task adaptation. Experiments on two NLP applications: few-shot text classification and multi-domain dialog state tracking demonstrate the superior performance of our proposed method.</abstract>
28202820
<url hash="93468fde">2021.naacl-main.206</url>
@@ -3111,7 +3111,7 @@
31113111
<author><first>Peng</first><last>Qi</last></author>
31123112
<author><first>Guangtao</first><last>Wang</last></author>
31133113
<author><first>Rex</first><last>Ying</last></author>
3114-
<author><first>Jing</first><last>Huang</last></author>
3114+
<author id="jing-huang"><first>Jing</first><last>Huang</last></author>
31153115
<author><first>Xiaodong</first><last>He</last></author>
31163116
<author><first>Bowen</first><last>Zhou</last></author>
31173117
<pages>2884–2894</pages>

data/xml/2021.repl4nlp.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@
377377
<author><first>Peng</first><last>Qi</last></author>
378378
<author><first>Guangtao</first><last>Wang</last></author>
379379
<author><first>Tengyu</first><last>Ma</last></author>
380-
<author><first>Jing</first><last>Huang</last></author>
380+
<author id="jing-huang"><first>Jing</first><last>Huang</last></author>
381381
<pages>307–315</pages>
382382
<abstract>Document-level relation extraction is a challenging task, requiring reasoning over multiple sentences to predict a set of relations in a document. In this paper, we propose a novel framework E2GRE (Entity and Evidence Guided Relation Extraction) that jointly extracts relations and the underlying evidence sentences by using large pretrained language model (LM) as input encoder. First, we propose to guide the pretrained LM’s attention mechanism to focus on relevant context by using attention probabilities as additional features for evidence prediction. Furthermore, instead of feeding the whole document into pretrained LMs to obtain entity representation, we concatenate document text with head entities to help LMs concentrate on parts of the document that are more related to the head entity. Our E2GRE jointly learns relation extraction and evidence prediction effectively, showing large gains on both these tasks, which we find are highly correlated.</abstract>
383383
<url hash="d034db75">2021.repl4nlp-1.30</url>

data/xml/2021.sustainlp.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@
143143
<author><first>Xiaochen</first><last>Hou</last></author>
144144
<author><first>Diyi</first><last>Yang</last></author>
145145
<author><first>Kathleen</first><last>McKeown</last></author>
146-
<author><first>Jing</first><last>Huang</last></author>
146+
<author id="jing-huang"><first>Jing</first><last>Huang</last></author>
147147
<pages>79–85</pages>
148148
<abstract>Large pre-trained language models (PLMs) have led to great success on various commonsense question answering (QA) tasks in an end-to-end fashion. However, little attention has been paid to what commonsense knowledge is needed to deeply characterize these QA tasks. In this work, we proposed to categorize the semantics needed for these tasks using the SocialIQA as an example. Building upon our labeled social knowledge categories dataset on top of SocialIQA, we further train neural QA models to incorporate such social knowledge categories and relation information from a knowledge base. Unlike previous work, we observe our models with semantic categorizations of social knowledge can achieve comparable performance with a relatively simple model and smaller size compared to other complex approaches.</abstract>
149149
<url hash="499d3240">2021.sustainlp-1.10</url>

data/xml/2021.textgraphs.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@
107107
<paper id="8">
108108
<title>Selective Attention Based Graph Convolutional Networks for Aspect-Level Sentiment Classification</title>
109109
<author><first>Xiaochen</first><last>Hou</last></author>
110-
<author><first>Jing</first><last>Huang</last></author>
110+
<author id="jing-huang"><first>Jing</first><last>Huang</last></author>
111111
<author><first>Guangtao</first><last>Wang</last></author>
112112
<author><first>Peng</first><last>Qi</last></author>
113113
<author><first>Xiaodong</first><last>He</last></author>

data/xml/2022.acl.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7546,7 +7546,7 @@ in the Case of Unambiguous Gender</title>
75467546
<author><first>Chao</first><last>Shang</last></author>
75477547
<author><first>Guangtao</first><last>Wang</last></author>
75487548
<author><first>Peng</first><last>Qi</last></author>
7549-
<author><first>Jing</first><last>Huang</last></author>
7549+
<author id="jing-huang"><first>Jing</first><last>Huang</last></author>
75507550
<pages>8017-8026</pages>
75517551
<abstract>Question answering over temporal knowledge graphs (KGs) efficiently uses facts contained in a temporal KG, which records entity relations and when they occur in time, to answer natural language questions (e.g., “Who was the president of the US before Obama?”). These questions often involve three time-related challenges that previous work fail to adequately address: 1) questions often do not specify exact timestamps of interest (e.g., “Obama” instead of 2000); 2) subtle lexical differences in time relations (e.g., “before” vs “after”); 3) off-the-shelf temporal KG embeddings that previous work builds on ignore the temporal order of timestamps, which is crucial for answering temporal-order related questions. In this paper, we propose a time-sensitive question answering (TSQA) framework to tackle these problems. TSQA features a timestamp estimation module to infer the unwritten timestamp from the question. We also employ a time-sensitive KG encoder to inject ordering information into the temporal KG embeddings that TSQA is based on. With the help of techniques to reduce the search space for potential answers, TSQA significantly outperforms the previous state of the art on a new benchmark for question answering over temporal KGs, especially achieving a 32% (absolute) error reduction on complex questions that require multiple steps of reasoning over facts in the temporal KG.</abstract>
75527552
<url hash="2642c44d">2022.acl-long.552</url>

data/xml/2022.emnlp.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4236,7 +4236,7 @@
42364236
<author><first>Shereen</first><last>Oraby</last><affiliation>Amazon Alexa AI</affiliation></author>
42374237
<author><first>Alessandra</first><last>Cervone</last><affiliation>Amazon Alexa AI</affiliation></author>
42384238
<author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon Alexa AI</affiliation></author>
4239-
<author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
4239+
<author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
42404240
<author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
42414241
<author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
42424242
<pages>4590-4605</pages>
@@ -4264,7 +4264,7 @@
42644264
<author><first>Shereen</first><last>Oraby</last><affiliation>Amazon Alexa AI</affiliation></author>
42654265
<author><first>Shuyang</first><last>Gao</last><affiliation>Amazon.com, Inc.</affiliation></author>
42664266
<author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon Alexa AI</affiliation></author>
4267-
<author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
4267+
<author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
42684268
<author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
42694269
<author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
42704270
<pages>4635-4648</pages>

data/xml/2023.acl.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7341,7 +7341,7 @@
73417341
<author><first>Wenbo</first><last>Zhao</last><affiliation>Amazon</affiliation></author>
73427342
<author><first>Yiwen</first><last>Chen</last><affiliation>University of Cambridge</affiliation></author>
73437343
<author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon Alexa AI</affiliation></author>
7344-
<author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
7344+
<author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
73457345
<author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
73467346
<pages>9235-9254</pages>
73477347
<abstract>Automatic melody-to-lyric generation is a task in which song lyrics are generated to go with a given melody. It is of significant practical interest and more challenging than unconstrained lyric generation as the music imposes additional constraints onto the lyrics. The training data is limited as most songs are copyrighted, resulting in models that underfit the complicated cross-modal relationship between melody and lyrics. In this work, we propose a method for generating high-quality lyrics without training on any aligned melody-lyric data. Specifically, we design a hierarchical lyric generation framework that first generates a song outline and second the complete lyrics. The framework enables disentanglement of training (based purely on text) from inference (melody-guided text generation) to circumvent the shortage of parallel data. We leverage the segmentation and rhythm alignment between melody and lyrics to compile the given melody into decoding constraints as guidance during inference. The two-step hierarchical design also enables content control via the lyric outline, a much-desired feature for democratizing collaborative song creation. Experimental results show that our model can generate high-quality lyrics that are more on-topic, singable, intelligible, and coherent than strong baselines, for example SongMASS, a SOTA model trained on a parallel dataset, with a 24% relative overall quality improvement based on human ratings. Our code is available at <url>https://github.com/amazon-science/unsupervised-melody-to-lyrics-generation</url>.</abstract>

data/xml/2023.blackboxnlp.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@
272272
</paper>
273273
<paper id="24">
274274
<title>Rigorously Assessing Natural Language Explanations of Neurons</title>
275-
<author><first>Jing</first><last>Huang</last></author>
275+
<author id="jing-huang-stanford"><first>Jing</first><last>Huang</last></author>
276276
<author><first>Atticus</first><last>Geiger</last></author>
277277
<author><first>Karel</first><last>D’Oosterlinck</last></author>
278278
<author><first>Zhengxuan</first><last>Wu</last></author>

0 commit comments

Comments
 (0)