Skip to content
This repository was archived by the owner on Nov 8, 2022. It is now read-only.

Commit f6466ed

Browse files
authored
Merge pull request #333 from NervanaSystems/alon/master_local
2 parents 11187c1 + 42b1486 commit f6466ed

File tree

11 files changed

+57
-31
lines changed

11 files changed

+57
-31
lines changed

doc/source/cross_doc_coref.rst

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,6 @@ Requirements
6363
{
6464
"topic_id": "2_ecb", #Required (a topic is a set of multiple documents that share the same subject)
6565
"doc_id": "1_10.xml", #Required (the article or document id this mention belong to)
66-
"mention_head": "Josh", #Optional
67-
"mention_head_lemma": "josh", #Optional
68-
"mention_head_pos": "NOUN", #Optional (part of speech)
69-
"mention_ner": null, #Optional (named entity recognition)
70-
"mention_type": "HUM", #Optional (for debugging)
7166
"sent_id": 0, #Optional (mention sentence number in document)
7267
"tokens_number": [ #Optional (the token number in sentence, will be required when using Within doc entities)
7368
13
@@ -77,11 +72,6 @@ Requirements
7772
{
7873
"topic_id": "2_ecb", #Required
7974
"doc_id": "1_11.xml",
80-
"mention_head": "Reid",
81-
"mention_head_lemma": "reid",
82-
"mention_head_pos": "PROPN",
83-
"mention_ner": "PERSON",
84-
"mention_type": "HUM",
8575
"sent_id": 0,
8676
"tokens_number": [
8777
3
@@ -165,8 +155,10 @@ Code Example
165155
166156
entity_config.gold_mentions_file = '<Replace with your entity mentions json file>'
167157
168-
# Set the location of your resources,
169-
# Use the default values if can
158+
# CDCResources hold default attribute values that might need to be change,
159+
# (using the defaults values in this example), use to configure attributes
160+
# such as resources files location, output directory, resources init methods and other.
161+
# check in class and see if any attributes require change in your set-up
170162
resource_location = CDCResources()
171163
172164
# create a new cross doc resources, with all needed semantic relation models

doc/source/identifying_semantic_relation.rst

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,9 +253,8 @@ Each script requires a **mentions** file in JSON format as seen below. This file
253253
254254
[
255255
{ # Mention 1
256-
"mention_head": "rep", #Optional
257-
"mention_head_lemma": "rep", #Optional
258-
"tokens_str": "rep" #Required
256+
"tokens_str": "Intel" #Required,
257+
"context": "Intel is the world's second largest and second highest valued semiconductor chip maker" #Optional (used in Elmo)
259258
},
260259
{ # Mention 2
261260
"tokens_str": "Tara Reid"

examples/cross_doc_coref/cross_doc_coref_sieves.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ def run_example():
5050
entity_config.gold_mentions_file = LIBRARY_ROOT + \
5151
'/datasets/ecb/ecb_all_entity_mentions.json'
5252

53+
# CDCResources hold default attribute values that might need to be change,
54+
# (using the defaults values in this example), use to configure attributes
55+
# such as resources files location, output directory, resources init methods and other.
56+
# check in class and see if any attributes require change in your set-up
5357
resource_location = CDCResources()
5458
resources = CDCSettings(resource_location, event_config, entity_config)
5559

nlp_architect/data/cdc_resources/relations/referent_dict_relation_extraction.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# ******************************************************************************
1616

1717
import logging
18+
import os
1819
from typing import Dict, List, Set
1920

2021
from nlp_architect.common.cdc.mention_data import MentionDataLight
@@ -38,11 +39,15 @@ def __init__(self, method: OnlineOROfflineMethod, ref_dict: str = None):
3839
ref_dict (required): str Location of referent dictionary file to work with
3940
"""
4041
logger.info('Loading ReferentDict module')
41-
if method == OnlineOROfflineMethod.OFFLINE:
42-
self.ref_dict = load_json_file(ref_dict)
43-
elif method == OnlineOROfflineMethod.ONLINE:
44-
self.ref_dict = self.load_reference_dict(ref_dict)
45-
logger.info('ReferentDict module lead successfully')
42+
if ref_dict is not None and os.path.isfile(ref_dict):
43+
if method == OnlineOROfflineMethod.OFFLINE:
44+
self.ref_dict = load_json_file(ref_dict)
45+
elif method == OnlineOROfflineMethod.ONLINE:
46+
self.ref_dict = self.load_reference_dict(ref_dict)
47+
logger.info('ReferentDict module lead successfully')
48+
else:
49+
raise FileNotFoundError('Referent Dict file not found or not in path..')
50+
4651
super(ReferentDictRelationExtraction, self).__init__()
4752

4853
def extract_all_relations(self, mention_x: MentionDataLight,

nlp_architect/data/cdc_resources/relations/verbocean_relation_extraction.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# ******************************************************************************
1616

1717
import logging
18+
import os
1819
from typing import Dict, Set
1920

2021
from nlp_architect.common.cdc.mention_data import MentionDataLight
@@ -38,11 +39,14 @@ def __init__(self, method: OnlineOROfflineMethod, vo_file: str):
3839
vo_file (required): str Location of VerbOcean file to work with
3940
"""
4041
logger.info('Loading Verb Ocean module')
41-
if method == OnlineOROfflineMethod.OFFLINE:
42-
self.vo = load_json_file(vo_file)
43-
elif method == OnlineOROfflineMethod.ONLINE:
44-
self.vo = self.load_verbocean_file(vo_file)
45-
logger.info('Verb Ocean module lead successfully')
42+
if vo_file is not None and os.path.isfile(vo_file):
43+
if method == OnlineOROfflineMethod.OFFLINE:
44+
self.vo = load_json_file(vo_file)
45+
elif method == OnlineOROfflineMethod.ONLINE:
46+
self.vo = self.load_verbocean_file(vo_file)
47+
logger.info('Verb Ocean module lead successfully')
48+
else:
49+
raise FileNotFoundError('VerbOcean file not found or not in path..')
4650
super(VerboceanRelationExtraction, self).__init__()
4751

4852
def extract_all_relations(self, mention_x: MentionDataLight,

nlp_architect/data/cdc_resources/relations/wikipedia_relation_extraction.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from __future__ import division
1818

1919
import logging
20+
import os
2021
from typing import Set, List
2122

2223
from nlp_architect.common.cdc.mention_data import MentionDataLight
@@ -50,13 +51,19 @@ def __init__(self, method: WikipediaSearchMethod, wiki_file: str = None, host: s
5051
index (required on Elastic mode): int the Elastic search index name
5152
"""
5253
logger.info('Loading Wikipedia module')
54+
5355
connectivity = method
5456
if connectivity == WikipediaSearchMethod.ONLINE:
5557
self.pywiki_impl = WikiOnline()
5658
elif connectivity == WikipediaSearchMethod.OFFLINE:
57-
self.pywiki_impl = WikiOffline(wiki_file)
59+
if wiki_file is not None and os.path.isdir(wiki_file):
60+
self.pywiki_impl = WikiOffline(wiki_file)
61+
else:
62+
raise FileNotFoundError('Wikipedia resource file not found or not in path, '
63+
'create it or change the initialization method')
5864
elif connectivity == WikipediaSearchMethod.ELASTIC:
5965
self.pywiki_impl = WikiElastic(host, port, index)
66+
6067
logger.info('Wikipedia module lead successfully')
6168
super(WikipediaRelationExtraction, self).__init__()
6269

nlp_architect/data/cdc_resources/relations/within_doc_coref_extraction.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,17 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
# ******************************************************************************
16+
import logging
17+
import os
1618
from typing import List, Set
1719

1820
from nlp_architect.common.cdc.mention_data import MentionData
1921
from nlp_architect.data.cdc_resources.relations.relation_extraction import RelationExtraction
2022
from nlp_architect.data.cdc_resources.relations.relation_types_enums import RelationType
2123
from nlp_architect.utils.io import load_json_file
2224

25+
logger = logging.getLogger(__name__)
26+
2327

2428
class WithinDocCoref(RelationExtraction):
2529
def __init__(self, wd_file: str):
@@ -29,8 +33,12 @@ def __init__(self, wd_file: str):
2933
Args:
3034
wd_file (required): str Location of within doc co-reference mentions file
3135
"""
32-
wd_mentions_json = load_json_file(wd_file)
33-
self.within_doc_coref_chain = self.arrange_resource(wd_mentions_json)
36+
logger.info('Loading Within doc resource')
37+
if wd_file is not None and os.path.isfile(wd_file):
38+
wd_mentions_json = load_json_file(wd_file)
39+
self.within_doc_coref_chain = self.arrange_resource(wd_mentions_json)
40+
else:
41+
raise FileNotFoundError('Within-doc resource file not found or not in path')
3442
super(WithinDocCoref, self).__init__()
3543

3644
@staticmethod

nlp_architect/data/cdc_resources/relations/wordnet_relation_extraction.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# ******************************************************************************
1616

1717
import logging
18+
import os
1819
from typing import Set, List
1920

2021
from nlp_architect.common.cdc.mention_data import MentionDataLight
@@ -44,7 +45,11 @@ def __init__(self, method: OnlineOROfflineMethod, wn_file: str = None):
4445
if self.connectivity == OnlineOROfflineMethod.ONLINE:
4546
self.wordnet_impl = WordnetOnline()
4647
elif self.connectivity == OnlineOROfflineMethod.OFFLINE:
47-
self.wordnet_impl = WordnetOffline(wn_file)
48+
if wn_file is not None and os.path.isdir(wn_file):
49+
self.wordnet_impl = WordnetOffline(wn_file)
50+
else:
51+
raise FileNotFoundError('WordNet resource directory not found or not in path')
52+
4853
logger.info('Wordnet module lead successfully')
4954
super(WordnetRelationExtraction, self).__init__()
5055

nlp_architect/models/cross_doc_coref/cdc_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def __init__(self):
9292
]
9393

9494
self.gold_mentions_file = LIBRARY_ROOT + \
95-
'/datasets/cdc/gold_mentions/ecb_test_event_gold_mentions.json'
95+
'/datasets/ecb/ecb_all_event_mentions.json'
9696

9797

9898
class EntityConfig(CDCConfig):
@@ -120,4 +120,4 @@ def __init__(self):
120120
]
121121

122122
self.gold_mentions_file = LIBRARY_ROOT + \
123-
'/datasets/cdc/gold_mentions/ecb_test_entity_gold_mentions.json'
123+
'/datasets/ecb/ecb_all_entity_mentions.json'

nlp_architect/utils/resources/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)