Skip to content

Commit 69e2b53

Browse files
Provide full Mention objects, not just the content. (#3156)
Fixes #3149.
1 parent ebb77fb commit 69e2b53

File tree

5 files changed

+190
-16
lines changed

5 files changed

+190
-16
lines changed

language/google/cloud/language/entity.py

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,80 @@ class EntityType(object):
4646
"""Other entity type (i.e. known but not classified)."""
4747

4848

49+
class MentionType(object):
50+
"""List of possible mention types."""
51+
52+
TYPE_UNKNOWN = 'TYPE_UNKNOWN'
53+
"""Unknown mention type"""
54+
55+
PROPER = 'PROPER'
56+
"""Proper name"""
57+
58+
COMMON = 'COMMON'
59+
"""Common noun (or noun compound)"""
60+
61+
62+
class Mention(object):
63+
"""A Google Cloud Natural Language API mention.
64+
65+
Represents a mention for an entity in the text. Currently, proper noun
66+
mentions are supported.
67+
"""
68+
def __init__(self, text, mention_type):
69+
self.text = text
70+
self.mention_type = mention_type
71+
72+
def __str__(self):
73+
return str(self.text)
74+
75+
@classmethod
76+
def from_api_repr(cls, payload):
77+
"""Convert a Mention from the JSON API into an :class:`Mention`.
78+
79+
:param payload: dict
80+
:type payload: The value from the backend.
81+
82+
:rtype: :class:`Mention`
83+
:returns: The mention parsed from the API representation.
84+
"""
85+
text = TextSpan.from_api_repr(payload['text'])
86+
mention_type = payload['type']
87+
return cls(text, mention_type)
88+
89+
90+
class TextSpan(object):
91+
"""A span of text from Google Cloud Natural Language API.
92+
93+
Represents a word or phrase of text, as well as its offset
94+
from the original document.
95+
"""
96+
def __init__(self, content, begin_offset):
97+
self.content = content
98+
self.begin_offset = begin_offset
99+
100+
def __str__(self):
101+
"""Return the string representation of this TextSpan.
102+
103+
:rtype: str
104+
:returns: The text content
105+
"""
106+
return self.content
107+
108+
@classmethod
109+
def from_api_repr(cls, payload):
110+
"""Convert a TextSpan from the JSON API into an :class:`TextSpan`.
111+
112+
:param payload: dict
113+
:type payload: The value from the backend.
114+
115+
:rtype: :class:`TextSpan`
116+
:returns: The text span parsed from the API representation.
117+
"""
118+
content = payload['content']
119+
begin_offset = payload['beginOffset']
120+
return cls(content=content, begin_offset=begin_offset)
121+
122+
49123
class Entity(object):
50124
"""A Google Cloud Natural Language API entity.
51125
@@ -101,6 +175,5 @@ def from_api_repr(cls, payload):
101175
entity_type = payload['type']
102176
metadata = payload['metadata']
103177
salience = payload['salience']
104-
mentions = [value['text']['content']
105-
for value in payload['mentions']]
178+
mentions = [Mention.from_api_repr(val) for val in payload['mentions']]
106179
return cls(name, entity_type, metadata, salience, mentions)

language/unit_tests/test_api_responses.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818

1919
class TestEntityResponse(unittest.TestCase):
2020
ENTITY_DICT = {
21-
'mentions': [{'text': {'content': 'Italian'}}],
21+
'mentions': [{
22+
'text': {'content': 'Italian', 'beginOffset': 0},
23+
'type': 'PROPER',
24+
}],
2225
'metadata': {'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy'},
2326
'name': 'Italian',
2427
'salience': 0.15,
@@ -46,12 +49,14 @@ def test_api_repr_factory(self):
4649

4750
def _verify_entity_response(self, entity_response):
4851
from google.cloud.language.entity import EntityType
52+
from google.cloud.language.entity import Mention
4953

5054
self.assertEqual(len(entity_response.entities), 1)
5155
entity = entity_response.entities[0]
5256
self.assertEqual(entity.name, 'Italian')
5357
self.assertEqual(len(entity.mentions), 1)
54-
self.assertEqual(entity.mentions[0], 'Italian')
58+
self.assertIsInstance(entity.mentions[0], Mention)
59+
self.assertEqual(str(entity.mentions[0]), 'Italian')
5560
self.assertTrue(entity.metadata['wikipedia_url'].endswith('Italy'))
5661
self.assertAlmostEqual(entity.salience, 0.15)
5762
self.assertEqual(entity.entity_type, EntityType.LOCATION)

language/unit_tests/test_document.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ def _get_entities(include_entities):
8484
'text': {
8585
'content': ANNOTATE_NAME,
8686
'beginOffset': -1
87-
}
87+
},
88+
'type': 'TYPE_UNKNOWN',
8889
}
8990
]
9091
},
@@ -215,7 +216,8 @@ def _verify_entity(self, entity, name, entity_type, wiki_url, salience):
215216
else:
216217
self.assertEqual(entity.metadata, {})
217218
self.assertEqual(entity.salience, salience)
218-
self.assertEqual(entity.mentions, [name])
219+
self.assertEqual(len(entity.mentions), 1)
220+
self.assertEqual(entity.mentions[0].text.content, name)
219221

220222
@staticmethod
221223
def _expected_data(content, encoding_type=None,
@@ -265,7 +267,8 @@ def test_analyze_entities(self):
265267
'text': {
266268
'content': name1,
267269
'beginOffset': -1
268-
}
270+
},
271+
'type': 'TYPE_UNKNOWN',
269272
}
270273
]
271274
},
@@ -280,6 +283,7 @@ def test_analyze_entities(self):
280283
'content': name2,
281284
'beginOffset': -1,
282285
},
286+
'type': 'PROPER',
283287
},
284288
],
285289
},

language/unit_tests/test_entity.py

Lines changed: 98 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ def _make_one(self, *args, **kw):
2727
return self._get_target_class()(*args, **kw)
2828

2929
def test_constructor_defaults(self):
30+
from google.cloud.language.entity import Mention
31+
from google.cloud.language.entity import MentionType
32+
from google.cloud.language.entity import TextSpan
33+
3034
name = 'Italian'
3135
entity_type = 'LOCATION'
3236
wiki_url = 'http://en.wikipedia.org/wiki/Italy'
@@ -35,7 +39,10 @@ def test_constructor_defaults(self):
3539
'wikipedia_url': wiki_url,
3640
}
3741
salience = 0.19960518
38-
mentions = ['Italian']
42+
mentions = [Mention(
43+
mention_type=MentionType.PROPER,
44+
text=TextSpan(content='Italian', begin_offset=0),
45+
)]
3946
entity = self._make_one(name, entity_type, metadata,
4047
salience, mentions)
4148
self.assertEqual(entity.name, name)
@@ -45,9 +52,13 @@ def test_constructor_defaults(self):
4552
self.assertEqual(entity.mentions, mentions)
4653

4754
def test_from_api_repr(self):
55+
from google.cloud.language.entity import EntityType
56+
from google.cloud.language.entity import Mention
57+
from google.cloud.language.entity import MentionType
58+
4859
klass = self._get_target_class()
4960
name = 'Italy'
50-
entity_type = 'LOCATION'
61+
entity_type = EntityType.LOCATION
5162
salience = 0.223
5263
wiki_url = 'http://en.wikipedia.org/wiki/Italy'
5364
mention1 = 'Italy'
@@ -59,14 +70,95 @@ def test_from_api_repr(self):
5970
'salience': salience,
6071
'metadata': {'wikipedia_url': wiki_url},
6172
'mentions': [
62-
{'text': {'content': mention1}},
63-
{'text': {'content': mention2}},
64-
{'text': {'content': mention3}},
73+
{'text': {'content': mention1, 'beginOffset': 3},
74+
'type': 'PROPER'},
75+
{'text': {'content': mention2, 'beginOffset': 5},
76+
'type': 'PROPER'},
77+
{'text': {'content': mention3, 'beginOffset': 8},
78+
'type': 'PROPER'},
6579
],
6680
}
6781
entity = klass.from_api_repr(payload)
6882
self.assertEqual(entity.name, name)
6983
self.assertEqual(entity.entity_type, entity_type)
7084
self.assertEqual(entity.salience, salience)
7185
self.assertEqual(entity.metadata, {'wikipedia_url': wiki_url})
72-
self.assertEqual(entity.mentions, [mention1, mention2, mention3])
86+
87+
# Assert that we got back Mention objects for each mention.
88+
self.assertIsInstance(entity.mentions[0], Mention)
89+
self.assertIsInstance(entity.mentions[1], Mention)
90+
self.assertIsInstance(entity.mentions[2], Mention)
91+
92+
# Assert that the text (and string coercison) are correct.
93+
self.assertEqual([str(i) for i in entity.mentions],
94+
[mention1, mention2, mention3])
95+
96+
# Assert that the begin offsets are preserved.
97+
self.assertEqual([i.text.begin_offset for i in entity.mentions],
98+
[3, 5, 8])
99+
100+
# Assert that the mention types are preserved.
101+
for mention in entity.mentions:
102+
self.assertEqual(mention.mention_type, MentionType.PROPER)
103+
104+
105+
class TestMention(unittest.TestCase):
106+
PAYLOAD = {
107+
'text': {'content': 'Greece', 'beginOffset': 42},
108+
'type': 'PROPER',
109+
}
110+
111+
def test_constructor(self):
112+
from google.cloud.language.entity import Mention
113+
from google.cloud.language.entity import MentionType
114+
from google.cloud.language.entity import TextSpan
115+
116+
mention = Mention(
117+
text=TextSpan(content='snails', begin_offset=90),
118+
mention_type=MentionType.COMMON,
119+
)
120+
121+
self.assertIsInstance(mention.text, TextSpan)
122+
self.assertEqual(mention.text.content, 'snails')
123+
self.assertEqual(mention.text.begin_offset, 90)
124+
self.assertEqual(mention.mention_type, MentionType.COMMON)
125+
126+
def test_from_api_repr(self):
127+
from google.cloud.language.entity import Mention
128+
from google.cloud.language.entity import MentionType
129+
from google.cloud.language.entity import TextSpan
130+
131+
mention = Mention.from_api_repr(self.PAYLOAD)
132+
133+
self.assertIsInstance(mention, Mention)
134+
self.assertIsInstance(mention.text, TextSpan)
135+
self.assertEqual(mention.text.content, 'Greece')
136+
self.assertEqual(mention.text.begin_offset, 42)
137+
self.assertEqual(mention.mention_type, MentionType.PROPER)
138+
139+
def test_dunder_str(self):
140+
from google.cloud.language.entity import Mention
141+
142+
mention = Mention.from_api_repr(self.PAYLOAD)
143+
self.assertEqual(str(mention), 'Greece')
144+
145+
146+
class TestTextSpan(unittest.TestCase):
147+
def test_constructor(self):
148+
from google.cloud.language.entity import TextSpan
149+
150+
text = TextSpan(content='Winston Churchill', begin_offset=1945)
151+
self.assertIsInstance(text, TextSpan)
152+
self.assertEqual(text.content, str(text), 'Winston Churchill')
153+
self.assertEqual(text.begin_offset, 1945)
154+
155+
def test_from_api_repr(self):
156+
from google.cloud.language.entity import TextSpan
157+
158+
text = TextSpan.from_api_repr({
159+
'beginOffset': 1953,
160+
'content': 'Queen Elizabeth',
161+
})
162+
self.assertIsInstance(text, TextSpan)
163+
self.assertEqual(text.content, str(text), 'Queen Elizabeth')
164+
self.assertEqual(text.begin_offset, 1953)

system_tests/language.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,15 @@ def _check_analyze_entities_result(self, entities):
7575
self.assertEqual(entity1.entity_type, EntityType.PERSON)
7676
self.assertGreater(entity1.salience, 0.0)
7777
# Other mentions may occur, e.g. "painter".
78-
self.assertIn(entity1.name, entity1.mentions)
78+
self.assertIn(entity1.name, [str(i) for i in entity1.mentions])
7979
self.assertEqual(entity1.metadata['wikipedia_url'],
8080
'http://en.wikipedia.org/wiki/Caravaggio')
8181
self.assertIsInstance(entity1.metadata, dict)
8282
# Verify entity 2.
8383
self.assertEqual(entity2.name, self.NAME2)
8484
self.assertEqual(entity2.entity_type, EntityType.LOCATION)
8585
self.assertGreater(entity2.salience, 0.0)
86-
self.assertEqual(entity2.mentions, [entity2.name])
86+
self.assertEqual([str(i) for i in entity2.mentions], [entity2.name])
8787
self.assertEqual(entity2.metadata['wikipedia_url'],
8888
'http://en.wikipedia.org/wiki/Italy')
8989
self.assertIsInstance(entity2.metadata, dict)
@@ -92,7 +92,7 @@ def _check_analyze_entities_result(self, entities):
9292
choices = (EntityType.EVENT, EntityType.WORK_OF_ART)
9393
self.assertIn(entity3.entity_type, choices)
9494
self.assertGreater(entity3.salience, 0.0)
95-
self.assertEqual(entity3.mentions, [entity3.name])
95+
self.assertEqual([str(i) for i in entity3.mentions], [entity3.name])
9696
wiki_url = ('http://en.wikipedia.org/wiki/'
9797
'The_Calling_of_St_Matthew_(Caravaggio)')
9898
self.assertEqual(entity3.metadata['wikipedia_url'], wiki_url)

0 commit comments

Comments
 (0)