Skip to content

Commit 6532857

Browse files
ctuftslmazuel
authored andcommitted
updated python examples to use the sdk and match the updated examples… (#41)
* updated python examples to use the sdk and match the updated examples present in the quickstart * Update text_analytics_samples.py
1 parent 40a2619 commit 6532857

File tree

1 file changed

+75
-112
lines changed

1 file changed

+75
-112
lines changed

samples/language/text_analytics_samples.py

Lines changed: 75 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,36 @@
11
# -*- coding: utf-8 -*-
22

33
import os
4-
54
from azure.cognitiveservices.language.textanalytics import TextAnalyticsClient
65
from msrest.authentication import CognitiveServicesCredentials
76

87
SUBSCRIPTION_KEY_ENV_NAME = "TEXTANALYTICS_SUBSCRIPTION_KEY"
9-
TEXTANALYTICS_LOCATION = os.environ.get("TEXTANALYTICS_LOCATION", "westcentralus")
8+
TEXTANALYTICS_LOCATION = os.environ.get(
9+
"TEXTANALYTICS_LOCATION", "westcentralus")
1010

1111

1212
def language_extraction(subscription_key):
1313
"""Language extraction.
1414
15-
This will detect the language of a few strings.
15+
This example detects the language of several strings.
1616
"""
17-
endpoint = "https://{}.api.cognitive.microsoft.com".format(TEXTANALYTICS_LOCATION)
18-
client = TextAnalyticsClient(endpoint=endpoint, credentials=CognitiveServicesCredentials(subscription_key))
17+
credentials = CognitiveServicesCredentials(subscription_key)
18+
text_analytics_url = "https://{}.api.cognitive.microsoft.com".format(
19+
TEXTANALYTICS_LOCATION)
20+
text_analytics = TextAnalyticsClient(
21+
endpoint=text_analytics_url, credentials=credentials)
1922

2023
try:
21-
documents = [{
22-
'id': 1,
23-
'text': 'This is a document written in English.'
24-
}, {
25-
'id': 2,
26-
'text': 'Este es un document escrito en Español.'
27-
}, {
28-
'id': 3,
29-
'text': '这是一个用中文写的文件'
30-
}]
31-
for document in documents:
32-
print("Asking language detection on '{}' (id: {})".format(document['text'], document['id']))
33-
response = client.detect_language(
34-
documents=documents
35-
)
24+
documents = [
25+
{'id': '1', 'text': 'This is a document written in English.'},
26+
{'id': '2', 'text': 'Este es un document escrito en Español.'},
27+
{'id': '3', 'text': '这是一个用中文写的文件'}
28+
]
29+
response = text_analytics.detect_language(documents=documents)
3630

3731
for document in response.documents:
38-
print("Found out that {} is {}".format(document.id, document.detected_languages[0].name))
32+
print("Document Id: ", document.id, ", Language: ",
33+
document.detected_languages[0].name)
3934

4035
except Exception as err:
4136
print("Encountered exception. {}".format(err))
@@ -44,41 +39,35 @@ def language_extraction(subscription_key):
4439
def key_phrases(subscription_key):
4540
"""Key-phrases.
4641
47-
The API returns a list of strings denoting the key talking points in the input text.
42+
Returns the key talking points in several text examples.
4843
"""
49-
endpoint = "https://{}.api.cognitive.microsoft.com".format(TEXTANALYTICS_LOCATION)
50-
client = TextAnalyticsClient(endpoint=endpoint, credentials=CognitiveServicesCredentials(subscription_key))
44+
credentials = CognitiveServicesCredentials(subscription_key)
45+
text_analytics_url = "https://{}.api.cognitive.microsoft.com".format(
46+
TEXTANALYTICS_LOCATION)
47+
text_analytics = TextAnalyticsClient(
48+
endpoint=text_analytics_url, credentials=credentials)
5149

5250
try:
53-
documents = [{
54-
'language': 'ja',
55-
'id': 1,
56-
'text': "猫は幸せ"
57-
}, {
58-
'language': 'de',
59-
'id': 2,
60-
'text': "Fahrt nach Stuttgart und dann zum Hotel zu Fu."
61-
}, {
62-
'language': 'en',
63-
'id': 3,
64-
'text': "My cat is stiff as a rock."
65-
}, {
66-
'language': 'es',
67-
'id': 4,
68-
'text': "A mi me encanta el fútbol!"
69-
}]
51+
documents = [
52+
{"id": "1", "language": "ja", "text": "猫は幸せ"},
53+
{"id": "2", "language": "de",
54+
"text": "Fahrt nach Stuttgart und dann zum Hotel zu Fu."},
55+
{"id": "3", "language": "en",
56+
"text": "My cat might need to see a veterinarian."},
57+
{"id": "4", "language": "es", "text": "A mi me encanta el fútbol!"}
58+
]
7059

7160
for document in documents:
72-
print("Asking key-phrases on '{}' (id: {})".format(document['text'], document['id']))
61+
print(
62+
"Asking key-phrases on '{}' (id: {})".format(document['text'], document['id']))
7363

74-
response = client.key_phrases(
75-
documents=documents
76-
)
64+
response = text_analytics.key_phrases(documents=documents)
7765

7866
for document in response.documents:
79-
print("Found out that in document {}, key-phrases are:".format(document.id))
67+
print("Document Id: ", document.id)
68+
print("\tKey Phrases:")
8069
for phrase in document.key_phrases:
81-
print("- {}".format(phrase))
70+
print("\t\t", phrase)
8271

8372
except Exception as err:
8473
print("Encountered exception. {}".format(err))
@@ -89,37 +78,26 @@ def sentiment(subscription_key):
8978
9079
Scores close to 1 indicate positive sentiment, while scores close to 0 indicate negative sentiment.
9180
"""
92-
endpoint = "https://{}.api.cognitive.microsoft.com".format(TEXTANALYTICS_LOCATION)
93-
client = TextAnalyticsClient(endpoint=endpoint, credentials=CognitiveServicesCredentials(subscription_key))
81+
credentials = CognitiveServicesCredentials(subscription_key)
82+
text_analytics_url = "https://{}.api.cognitive.microsoft.com".format(
83+
TEXTANALYTICS_LOCATION)
84+
text_analytics = TextAnalyticsClient(
85+
endpoint=text_analytics_url, credentials=credentials)
9486

9587
try:
96-
documents = [{
97-
'language': 'en',
98-
'id': 0,
99-
'text': "I had the best day of my life."
100-
}, {
101-
'language': 'en',
102-
'id': 1,
103-
'text': "This was a waste of my time. The speaker put me to sleep."
104-
}, {
105-
'language': 'es',
106-
'id': 2,
107-
'text': "No tengo dinero ni nada que dar..."
108-
}, {
109-
'language': 'it',
110-
'id': 3,
111-
'text': "L'hotel veneziano era meraviglioso. È un bellissimo pezzo di architettura."
112-
}]
113-
114-
for document in documents:
115-
print("Asking sentiment on '{}' (id: {})".format(document['text'], document['id']))
116-
117-
response = client.sentiment(
118-
documents=documents
119-
)
120-
88+
documents = [
89+
{"id": "1", "language": "en", "text": "I had the best day of my life."},
90+
{"id": "2", "language": "en",
91+
"text": "This was a waste of my time. The speaker put me to sleep."},
92+
{"id": "3", "language": "es", "text": "No tengo dinero ni nada que dar..."},
93+
{"id": "4", "language": "it",
94+
"text": "L'hotel veneziano era meraviglioso. È un bellissimo pezzo di architettura."}
95+
]
96+
97+
response = text_analytics.sentiment(documents=documents)
12198
for document in response.documents:
122-
print("Found out that in document {}, sentimet score is {}:".format(document.id, document.score))
99+
print("Document Id: ", document.id, ", Sentiment Score: ",
100+
"{:.2f}".format(document.score))
123101

124102
except Exception as err:
125103
print("Encountered exception. {}".format(err))
@@ -128,54 +106,39 @@ def sentiment(subscription_key):
128106
def entity_extraction(subscription_key):
129107
"""EntityExtraction.
130108
131-
Extracts the entities from sentences and prints out their properties
109+
Extracts the entities from sentences and prints out their properties.
132110
"""
133-
endpoint = "https://{}.api.cognitive.microsoft.com".format(TEXTANALYTICS_LOCATION)
134-
client = TextAnalyticsClient(endpoint=endpoint, credentials=CognitiveServicesCredentials(subscription_key))
111+
credentials = CognitiveServicesCredentials(subscription_key)
112+
text_analytics_url = "https://{}.api.cognitive.microsoft.com".format(
113+
TEXTANALYTICS_LOCATION)
114+
text_analytics = TextAnalyticsClient(
115+
endpoint=text_analytics_url, credentials=credentials)
135116

136117
try:
137-
documents = [{
138-
'language': 'en',
139-
'id': 0,
140-
'text': "Microsoft released win10. Microsoft also released Hololens"
141-
}, {
142-
'language': 'en',
143-
'id': 1,
144-
'text': "Microsoft is an IT company."
145-
}, {
146-
'language': 'es',
147-
'id': 2,
148-
'text': "Microsoft lanzó win10. Microsoft también lanzó Hololens"
149-
}, {
150-
'language': 'es',
151-
'id': 3,
152-
'text': "Microsoft es una empresa de TI."
153-
}]
154-
for document in documents:
155-
print("Extracting entities from '{}' (id: {})".format(document['text'], document['id']))
156-
157-
response = client.entities(
158-
documents=documents
159-
)
118+
documents = [
119+
{"id": "1", "language": "en", "text": "Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters for the Altair 8800."},
120+
{"id": "2", "language": "es",
121+
"text": "La sede principal de Microsoft se encuentra en la ciudad de Redmond, a 21 kilómetros de Seattle."}
122+
]
123+
response = text_analytics.entities(documents=documents)
160124

161125
for document in response.documents:
162-
print("Document ID: {}".format(document['Id']))
163-
print("\t Entities:")
164-
for entity in document['Entities']:
165-
print("\t\tEntity Name: {}".format(entity.name))
166-
print("\t\tWikipedia Language: {}".format(entity.wikipedia_language))
167-
print("\t\tWikipedia Url: {}".format(entity.wikipedia_url))
168-
print("\t\tNumber of times appeared on the text: {}".format(len(entity.matches)))
169-
print("\t\tEntity Type: {}".format(entity.type))
170-
print("\t\tEntity SubType: {}".format(entity.sub_type))
171-
print("\n")
126+
print("Document Id: ", document.id)
127+
print("\tKey Entities:")
128+
for entity in document.entities:
129+
print("\t\t", "NAME: ", entity.name, "\tType: ",
130+
entity.type, "\tSub-type: ", entity.sub_type)
131+
for match in entity.matches:
132+
print("\t\t\tOffset: ", match.offset, "\tLength: ", match.length, "\tScore: ",
133+
"{:.2f}".format(match.entity_type_score))
172134

173135
except Exception as err:
174136
print("Encountered exception. {}".format(err))
175137

176138

177139
if __name__ == "__main__":
178-
import sys, os.path
140+
import sys
141+
import os.path
179142

180143
sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..")))
181144
from tools import execute_samples

0 commit comments

Comments
 (0)