11# -*- coding: utf-8 -*-
22
33import os
4-
54from azure .cognitiveservices .language .textanalytics import TextAnalyticsClient
65from msrest .authentication import CognitiveServicesCredentials
76
87SUBSCRIPTION_KEY_ENV_NAME = "TEXTANALYTICS_SUBSCRIPTION_KEY"
9- TEXTANALYTICS_LOCATION = os .environ .get ("TEXTANALYTICS_LOCATION" , "westcentralus" )
8+ TEXTANALYTICS_LOCATION = os .environ .get (
9+ "TEXTANALYTICS_LOCATION" , "westcentralus" )
1010
1111
1212def language_extraction (subscription_key ):
1313 """Language extraction.
1414
15- This will detect the language of a few strings.
15+ This example detects the language of several strings.
1616 """
17- endpoint = "https://{}.api.cognitive.microsoft.com" .format (TEXTANALYTICS_LOCATION )
18- client = TextAnalyticsClient (endpoint = endpoint , credentials = CognitiveServicesCredentials (subscription_key ))
17+ credentials = CognitiveServicesCredentials (subscription_key )
18+ text_analytics_url = "https://{}.api.cognitive.microsoft.com" .format (
19+ TEXTANALYTICS_LOCATION )
20+ text_analytics = TextAnalyticsClient (
21+ endpoint = text_analytics_url , credentials = credentials )
1922
2023 try :
21- documents = [{
22- 'id' : 1 ,
23- 'text' : 'This is a document written in English.'
24- }, {
25- 'id' : 2 ,
26- 'text' : 'Este es un document escrito en Español.'
27- }, {
28- 'id' : 3 ,
29- 'text' : '这是一个用中文写的文件'
30- }]
31- for document in documents :
32- print ("Asking language detection on '{}' (id: {})" .format (document ['text' ], document ['id' ]))
33- response = client .detect_language (
34- documents = documents
35- )
24+ documents = [
25+ {'id' : '1' , 'text' : 'This is a document written in English.' },
26+ {'id' : '2' , 'text' : 'Este es un document escrito en Español.' },
27+ {'id' : '3' , 'text' : '这是一个用中文写的文件' }
28+ ]
29+ response = text_analytics .detect_language (documents = documents )
3630
3731 for document in response .documents :
38- print ("Found out that {} is {}" .format (document .id , document .detected_languages [0 ].name ))
32+ print ("Document Id: " , document .id , ", Language: " ,
33+ document .detected_languages [0 ].name )
3934
4035 except Exception as err :
4136 print ("Encountered exception. {}" .format (err ))
@@ -44,41 +39,35 @@ def language_extraction(subscription_key):
4439def key_phrases (subscription_key ):
4540 """Key-phrases.
4641
47- The API returns a list of strings denoting the key talking points in the input text.
42+ Returns the key talking points in several text examples .
4843 """
49- endpoint = "https://{}.api.cognitive.microsoft.com" .format (TEXTANALYTICS_LOCATION )
50- client = TextAnalyticsClient (endpoint = endpoint , credentials = CognitiveServicesCredentials (subscription_key ))
44+ credentials = CognitiveServicesCredentials (subscription_key )
45+ text_analytics_url = "https://{}.api.cognitive.microsoft.com" .format (
46+ TEXTANALYTICS_LOCATION )
47+ text_analytics = TextAnalyticsClient (
48+ endpoint = text_analytics_url , credentials = credentials )
5149
5250 try :
53- documents = [{
54- 'language' : 'ja' ,
55- 'id' : 1 ,
56- 'text' : "猫は幸せ"
57- }, {
58- 'language' : 'de' ,
59- 'id' : 2 ,
60- 'text' : "Fahrt nach Stuttgart und dann zum Hotel zu Fu."
61- }, {
62- 'language' : 'en' ,
63- 'id' : 3 ,
64- 'text' : "My cat is stiff as a rock."
65- }, {
66- 'language' : 'es' ,
67- 'id' : 4 ,
68- 'text' : "A mi me encanta el fútbol!"
69- }]
51+ documents = [
52+ {"id" : "1" , "language" : "ja" , "text" : "猫は幸せ" },
53+ {"id" : "2" , "language" : "de" ,
54+ "text" : "Fahrt nach Stuttgart und dann zum Hotel zu Fu." },
55+ {"id" : "3" , "language" : "en" ,
56+ "text" : "My cat might need to see a veterinarian." },
57+ {"id" : "4" , "language" : "es" , "text" : "A mi me encanta el fútbol!" }
58+ ]
7059
7160 for document in documents :
72- print ("Asking key-phrases on '{}' (id: {})" .format (document ['text' ], document ['id' ]))
61+ print (
62+ "Asking key-phrases on '{}' (id: {})" .format (document ['text' ], document ['id' ]))
7363
74- response = client .key_phrases (
75- documents = documents
76- )
64+ response = text_analytics .key_phrases (documents = documents )
7765
7866 for document in response .documents :
79- print ("Found out that in document {}, key-phrases are:" .format (document .id ))
67+ print ("Document Id: " , document .id )
68+ print ("\t Key Phrases:" )
8069 for phrase in document .key_phrases :
81- print ("- {}" . format ( phrase ) )
70+ print ("\t \t " , phrase )
8271
8372 except Exception as err :
8473 print ("Encountered exception. {}" .format (err ))
@@ -89,37 +78,26 @@ def sentiment(subscription_key):
8978
9079 Scores close to 1 indicate positive sentiment, while scores close to 0 indicate negative sentiment.
9180 """
92- endpoint = "https://{}.api.cognitive.microsoft.com" .format (TEXTANALYTICS_LOCATION )
93- client = TextAnalyticsClient (endpoint = endpoint , credentials = CognitiveServicesCredentials (subscription_key ))
81+ credentials = CognitiveServicesCredentials (subscription_key )
82+ text_analytics_url = "https://{}.api.cognitive.microsoft.com" .format (
83+ TEXTANALYTICS_LOCATION )
84+ text_analytics = TextAnalyticsClient (
85+ endpoint = text_analytics_url , credentials = credentials )
9486
9587 try :
96- documents = [{
97- 'language' : 'en' ,
98- 'id' : 0 ,
99- 'text' : "I had the best day of my life."
100- }, {
101- 'language' : 'en' ,
102- 'id' : 1 ,
103- 'text' : "This was a waste of my time. The speaker put me to sleep."
104- }, {
105- 'language' : 'es' ,
106- 'id' : 2 ,
107- 'text' : "No tengo dinero ni nada que dar..."
108- }, {
109- 'language' : 'it' ,
110- 'id' : 3 ,
111- 'text' : "L'hotel veneziano era meraviglioso. È un bellissimo pezzo di architettura."
112- }]
113-
114- for document in documents :
115- print ("Asking sentiment on '{}' (id: {})" .format (document ['text' ], document ['id' ]))
116-
117- response = client .sentiment (
118- documents = documents
119- )
120-
88+ documents = [
89+ {"id" : "1" , "language" : "en" , "text" : "I had the best day of my life." },
90+ {"id" : "2" , "language" : "en" ,
91+ "text" : "This was a waste of my time. The speaker put me to sleep." },
92+ {"id" : "3" , "language" : "es" , "text" : "No tengo dinero ni nada que dar..." },
93+ {"id" : "4" , "language" : "it" ,
94+ "text" : "L'hotel veneziano era meraviglioso. È un bellissimo pezzo di architettura." }
95+ ]
96+
97+ response = text_analytics .sentiment (documents = documents )
12198 for document in response .documents :
122- print ("Found out that in document {}, sentimet score is {}:" .format (document .id , document .score ))
99+ print ("Document Id: " , document .id , ", Sentiment Score: " ,
100+ "{:.2f}" .format (document .score ))
123101
124102 except Exception as err :
125103 print ("Encountered exception. {}" .format (err ))
@@ -128,54 +106,39 @@ def sentiment(subscription_key):
128106def entity_extraction (subscription_key ):
129107 """EntityExtraction.
130108
131- Extracts the entities from sentences and prints out their properties
109+ Extracts the entities from sentences and prints out their properties.
132110 """
133- endpoint = "https://{}.api.cognitive.microsoft.com" .format (TEXTANALYTICS_LOCATION )
134- client = TextAnalyticsClient (endpoint = endpoint , credentials = CognitiveServicesCredentials (subscription_key ))
111+ credentials = CognitiveServicesCredentials (subscription_key )
112+ text_analytics_url = "https://{}.api.cognitive.microsoft.com" .format (
113+ TEXTANALYTICS_LOCATION )
114+ text_analytics = TextAnalyticsClient (
115+ endpoint = text_analytics_url , credentials = credentials )
135116
136117 try :
137- documents = [{
138- 'language' : 'en' ,
139- 'id' : 0 ,
140- 'text' : "Microsoft released win10. Microsoft also released Hololens"
141- }, {
142- 'language' : 'en' ,
143- 'id' : 1 ,
144- 'text' : "Microsoft is an IT company."
145- }, {
146- 'language' : 'es' ,
147- 'id' : 2 ,
148- 'text' : "Microsoft lanzó win10. Microsoft también lanzó Hololens"
149- }, {
150- 'language' : 'es' ,
151- 'id' : 3 ,
152- 'text' : "Microsoft es una empresa de TI."
153- }]
154- for document in documents :
155- print ("Extracting entities from '{}' (id: {})" .format (document ['text' ], document ['id' ]))
156-
157- response = client .entities (
158- documents = documents
159- )
118+ documents = [
119+ {"id" : "1" , "language" : "en" , "text" : "Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters for the Altair 8800." },
120+ {"id" : "2" , "language" : "es" ,
121+ "text" : "La sede principal de Microsoft se encuentra en la ciudad de Redmond, a 21 kilómetros de Seattle." }
122+ ]
123+ response = text_analytics .entities (documents = documents )
160124
161125 for document in response .documents :
162- print ("Document ID: {}" .format (document ['Id' ]))
163- print ("\t Entities:" )
164- for entity in document ['Entities' ]:
165- print ("\t \t Entity Name: {}" .format (entity .name ))
166- print ("\t \t Wikipedia Language: {}" .format (entity .wikipedia_language ))
167- print ("\t \t Wikipedia Url: {}" .format (entity .wikipedia_url ))
168- print ("\t \t Number of times appeared on the text: {}" .format (len (entity .matches )))
169- print ("\t \t Entity Type: {}" .format (entity .type ))
170- print ("\t \t Entity SubType: {}" .format (entity .sub_type ))
171- print ("\n " )
126+ print ("Document Id: " , document .id )
127+ print ("\t Key Entities:" )
128+ for entity in document .entities :
129+ print ("\t \t " , "NAME: " , entity .name , "\t Type: " ,
130+ entity .type , "\t Sub-type: " , entity .sub_type )
131+ for match in entity .matches :
132+ print ("\t \t \t Offset: " , match .offset , "\t Length: " , match .length , "\t Score: " ,
133+ "{:.2f}" .format (match .entity_type_score ))
172134
173135 except Exception as err :
174136 print ("Encountered exception. {}" .format (err ))
175137
176138
177139if __name__ == "__main__" :
178- import sys , os .path
140+ import sys
141+ import os .path
179142
180143 sys .path .append (os .path .abspath (os .path .join (__file__ , ".." , ".." )))
181144 from tools import execute_samples
0 commit comments