Skip to content
This repository was archived by the owner on Nov 8, 2022. It is now read-only.

Commit e3919d9

Browse files
author
Alon Eirew
committed
some minor API's changes
1 parent d510f81 commit e3919d9

File tree

5 files changed

+78
-82
lines changed

5 files changed

+78
-82
lines changed

examples/cross_doc_coref/cross_doc_coref_sieves.py

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,24 @@
2828
from nlp_architect.models.cross_doc_sieves import run_event_coref, run_entity_coref
2929

3030

31-
def run_example():
31+
def run_example(cdc_settings):
32+
event_mentions = Topics(LIBRARY_ROOT + '/datasets/ecb/ecb_all_event_mentions.json')
33+
34+
event_clusters = None
35+
if cdc_settings.event_config.run_evaluation:
36+
logger.info('Running event coreference resolution')
37+
event_clusters = run_event_coref(event_mentions, cdc_settings)
38+
39+
entity_mentions = Topics(LIBRARY_ROOT + '/datasets/ecb/ecb_all_entity_mentions.json')
40+
entity_clusters = None
41+
if cdc_settings.entity_config.run_evaluation:
42+
logger.info('Running entity coreference resolution')
43+
entity_clusters = run_entity_coref(entity_mentions, cdc_settings)
44+
45+
return event_clusters, entity_clusters
46+
47+
48+
def create_example_settings():
3249
event_config = EventConfig()
3350
event_config.sieves_order = [
3451
(SieveType.STRICT, RelationType.SAME_HEAD_LEMMA, 0.0),
@@ -37,11 +54,7 @@ def run_example():
3754
(SieveType.RELAX, RelationType.SAME_HEAD_LEMMA_RELAX, 0.5),
3855
]
3956

40-
event_config.gold_mentions = Topics(LIBRARY_ROOT
41-
+ '/datasets/ecb/ecb_all_event_mentions.json')
42-
4357
entity_config = EntityConfig()
44-
4558
entity_config.sieves_order = [
4659
(SieveType.STRICT, RelationType.SAME_HEAD_LEMMA, 0.0),
4760
(SieveType.VERY_RELAX, RelationType.WIKIPEDIA_REDIRECT_LINK, 0.1),
@@ -50,36 +63,18 @@ def run_example():
5063
(SieveType.VERY_RELAX, RelationType.REFERENT_DICT, 0.5)
5164
]
5265

53-
entity_config.gold_mentions = Topics(LIBRARY_ROOT
54-
+ '/datasets/ecb/ecb_all_entity_mentions.json')
55-
5666
# CDCResources hold default attribute values that might need to be change,
5767
# (using the defaults values in this example), use to configure attributes
5868
# such as resources files location, output directory, resources init methods and other.
5969
# check in class and see if any attributes require change in your set-up
6070
resource_location = CDCResources()
61-
resources = CDCSettings(resource_location, event_config, entity_config)
62-
63-
event_clusters = None
64-
if event_config.run_evaluation:
65-
logger.info('Running event coreference resolution')
66-
event_clusters = run_event_coref(resources)
67-
68-
entity_clusters = None
69-
if entity_config.run_evaluation:
70-
logger.info('Running entity coreference resolution')
71-
entity_clusters = run_entity_coref(resources)
72-
73-
print('-=Cross Document Coref Results=-')
74-
print_results(event_clusters, 'Event')
75-
print('################################')
76-
print_results(entity_clusters, 'Entity')
71+
return CDCSettings(resource_location, event_config, entity_config)
7772

7873

7974
def print_results(clusters: List[Clusters], type: str):
8075
print('-=' + type + ' Clusters=-')
8176
for topic_cluster in clusters:
82-
print('\n\tCluster Topic=' + topic_cluster.topic_id)
77+
print('\n\tTopic=' + topic_cluster.topic_id)
8378
for cluster in topic_cluster.clusters_list:
8479
cluster_mentions = list()
8580
for mention in cluster.mentions:
@@ -92,8 +87,18 @@ def print_results(clusters: List[Clusters], type: str):
9287
+ str(cluster_mentions))
9388

9489

90+
def run_cdc_pipeline():
91+
cdc_settings = create_example_settings()
92+
event_clusters, entity_clusters = run_example(cdc_settings)
93+
94+
print('-=Cross Document Coref Results=-')
95+
print_results(event_clusters, 'Event')
96+
print('################################')
97+
print_results(entity_clusters, 'Entity')
98+
99+
95100
if __name__ == '__main__':
96101
logging.basicConfig(level=logging.INFO)
97102
logger = logging.getLogger(__name__)
98103

99-
run_example()
104+
run_cdc_pipeline()

nlp_architect/models/cross_doc_coref/cdc_config.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def __init__(self):
2727

2828
self.__sieves_order = None
2929
self.__run_evaluation = False
30-
self.__gold_mentions = None
3130

3231
@property
3332
def sieves_order(self):
@@ -58,15 +57,6 @@ def run_evaluation(self):
5857
def run_evaluation(self, run_evaluation: bool):
5958
self.__run_evaluation = run_evaluation
6059

61-
@property
62-
def gold_mentions(self) -> Topics:
63-
"""Mentions file to run against"""
64-
return self.__gold_mentions
65-
66-
@gold_mentions.setter
67-
def gold_mentions(self, gold_mentions_topics: Topics):
68-
self.__gold_mentions = gold_mentions_topics
69-
7060

7161
class EventConfig(CDCConfig):
7262
def __init__(self):
@@ -92,9 +82,6 @@ def __init__(self):
9282
(SieveType.STRICT, RelationType.WORDNET_DERIVATIONALLY, 0.0)
9383
]
9484

95-
self.gold_mentions = Topics(LIBRARY_ROOT
96-
+ '/datasets/ecb/ecb_all_event_mentions.json')
97-
9885

9986
class EntityConfig(CDCConfig):
10087
def __init__(self):
@@ -119,6 +106,3 @@ def __init__(self):
119106
(SieveType.STRICT, RelationType.WORDNET_SAME_SYNSET_ENTITY, 0.0),
120107
(SieveType.VERY_RELAX, RelationType.REFERENT_DICT, 0.5)
121108
]
122-
123-
self.gold_mentions = (LIBRARY_ROOT
124-
+ '/datasets/ecb/ecb_all_entity_mentions.json')

nlp_architect/models/cross_doc_coref/system/cdc_settings.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,21 +43,12 @@ def __init__(self, resources, event_coref_config, entity_coref_config):
4343
self.context2vec_model = None
4444
self.wordnet = None
4545
self.within_doc = None
46-
self.events_topics = None
47-
self.entity_topics = None
4846
self.event_config = event_coref_config
4947
self.entity_config = entity_coref_config
5048
self.cdc_resources = resources
5149

5250
self.load_modules()
5351

54-
if event_coref_config.run_evaluation:
55-
self.events_topics = event_coref_config.gold_mentions
56-
if entity_coref_config.run_evaluation:
57-
self.entity_topics = entity_coref_config.gold_mentions
58-
if not self.events_topics and not self.entity_topics:
59-
raise Exception('No entity or events Gold topics loaded!')
60-
6152
def load_modules(self):
6253
relations = set()
6354
for sieve in self.event_config.sieves_order:

nlp_architect/models/cross_doc_coref/system/sieves/run_sieve_system.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
import time
1919

2020
from nlp_architect.common.cdc.cluster import Clusters
21+
from nlp_architect.common.cdc.topics import Topic
22+
from nlp_architect.models.cross_doc_coref.system.cdc_settings import CDCSettings
2123
from nlp_architect.models.cross_doc_coref.system.sieves.sieves import get_sieve
2224

2325
logger = logging.getLogger(__name__)
@@ -90,3 +92,12 @@ def __init__(self, topic, resources):
9092
super(RunSystemsEvent, self).__init__(topic)
9193
self.sieves = self.set_sieves_from_config(resources.event_config,
9294
resources.get_module_from_relation)
95+
96+
97+
def get_run_system(topic: Topic, resource: CDCSettings, eval_type: str):
98+
if eval_type.lower() == 'entity':
99+
return RunSystemsEntity(topic, resource)
100+
elif eval_type.lower() == 'event':
101+
return RunSystemsEvent(topic, resource)
102+
else:
103+
raise AttributeError(eval_type + ' Not supported!')

nlp_architect/models/cross_doc_sieves.py

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,67 +18,72 @@
1818
from typing import List
1919

2020
from nlp_architect.common.cdc.cluster import Clusters
21+
from nlp_architect.common.cdc.topics import Topics
2122
from nlp_architect.models.cross_doc_coref.system.cdc_settings import CDCSettings
2223
from nlp_architect.models.cross_doc_coref.system.cdc_utils import write_clusters_to_file, \
2324
write_event_coref_scorer_results, write_entity_coref_scorer_results
24-
from nlp_architect.models.cross_doc_coref.system.sieves.run_sieve_system import RunSystemsEvent, \
25-
RunSystemsEntity
25+
from nlp_architect.models.cross_doc_coref.system.sieves.run_sieve_system import get_run_system
2626
from nlp_architect.utils import io
2727

2828
logger = logging.getLogger(__name__)
2929

3030

31-
def run_event_coref(resources: CDCSettings) -> List[Clusters]:
31+
def run_event_coref(topics: Topics, resources: CDCSettings) -> List[Clusters]:
3232
"""
3333
Running Cross Document Coref on event mentions
3434
Args:
35+
topics : The Topics (with mentions) to evaluate
3536
resources: resources for running the evaluation
3637
3738
Returns:
3839
Clusters: List of clusters and mentions with predicted cross doc coref within each topic
3940
"""
40-
io.create_folder(resources.cdc_resources.eval_output_dir)
41-
event_clusters_list = list()
42-
for topic in resources.events_topics.topics_list:
43-
sieves_list_event = RunSystemsEvent(topic, resources)
44-
clusters = sieves_list_event.run_deterministic()
45-
clusters.set_coref_chain_to_mentions()
46-
event_clusters_list.append(clusters)
47-
with open(os.path.join(
48-
resources.cdc_resources.eval_output_dir, 'event_clusters.txt'), 'w') \
49-
as event_clusters_file:
50-
write_clusters_to_file(clusters, topic.topic_id, event_clusters_file)
5141

52-
logger.info('Write event coref results')
53-
write_event_coref_scorer_results(resources.events_topics.topics_list,
54-
resources.cdc_resources.eval_output_dir)
55-
return event_clusters_list
42+
return _run_coref(topics, resources, 'event')
43+
44+
45+
def run_entity_coref(topics: Topics, resources: CDCSettings) -> List[Clusters]:
46+
"""
47+
Running Cross Document Coref on Entity mentions
48+
Args:
49+
topics : The Topics (with mentions) to evaluate
50+
resources: (CDCSettings) resources for running the evaluation
51+
52+
Returns:
53+
Clusters: List of topics and mentions with predicted cross doc coref within each topic
54+
"""
55+
return _run_coref(topics, resources, 'entity')
5656

5757

58-
def run_entity_coref(resources: CDCSettings) -> List[Clusters]:
58+
def _run_coref(topics: Topics, resources: CDCSettings, eval_type: str) -> List[Clusters]:
5959
"""
6060
Running Cross Document Coref on Entity mentions
6161
Args:
6262
resources: (CDCSettings) resources for running the evaluation
63+
topics : The Topics (with mentions) to evaluate
6364
6465
Returns:
6566
Clusters: List of topics and mentions with predicted cross doc coref within each topic
6667
"""
6768
io.create_folder(resources.cdc_resources.eval_output_dir)
68-
entity_clusters_list = list()
69-
for topic in resources.entity_topics.topics_list:
70-
sieves_list_entity = RunSystemsEntity(topic, resources)
71-
clusters = sieves_list_entity.run_deterministic()
69+
clusters_list = list()
70+
for topic in topics.topics_list:
71+
sieves_list = get_run_system(topic, resources, eval_type)
72+
clusters = sieves_list.run_deterministic()
7273
clusters.set_coref_chain_to_mentions()
73-
entity_clusters_list.append(clusters)
74+
clusters_list.append(clusters)
7475

7576
with open(os.path.join(
76-
resources.cdc_resources.eval_output_dir, 'entity_clusters.txt'), 'w') \
77-
as entity_clusters_file:
78-
write_clusters_to_file(clusters, topic.topic_id, entity_clusters_file)
77+
resources.cdc_resources.eval_output_dir, eval_type+ '_clusters.txt'), 'w') \
78+
as clusters_file:
79+
write_clusters_to_file(clusters, topic.topic_id, clusters_file)
7980

80-
logger.info('Write entity coref results')
81-
write_entity_coref_scorer_results(resources.entity_topics.topics_list,
82-
resources.cdc_resources.eval_output_dir)
81+
logger.info('Write ' + eval_type + ' coref results')
82+
if eval_type.lower() == 'entity':
83+
write_entity_coref_scorer_results(topics.topics_list,
84+
resources.cdc_resources.eval_output_dir)
85+
else:
86+
write_event_coref_scorer_results(topics.topics_list,
87+
resources.cdc_resources.eval_output_dir)
8388

84-
return entity_clusters_list
89+
return clusters_list

0 commit comments

Comments
 (0)