-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWordNetPT.py
More file actions
85 lines (45 loc) · 1.54 KB
/
WordNetPT.py
File metadata and controls
85 lines (45 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 29 23:23:30 2020
@author: Eduardo Vicente
"""
import requests, ast
def encode_aux(string):
return string.encode('latin1').decode('utf-8')
def remove_duplicates(lista):
return list(dict.fromkeys(lista))
def process_content(content):
conteudo = str(content)
conteudo = conteudo.split("'")
l = ast.literal_eval(conteudo[1])
return l
def synsets(word):
URL_search ="http://wordnet.pt/api/por/search/"
URL_synset ="http://wordnet.pt/api/por/synset/"
r = requests.get(URL_search+word)
# print(r.encoding)
l = process_content(r.content)
# print(l)
sin = list()
if len(l) > 0:
for i in range(len(l)):
ss = encode_aux(str(l[i]))
r2 = requests.get(URL_synset+ss)
l2 = process_content(r2.content)
if len(l2) > 0:
for j in range(len(l2)):
strx = encode_aux(str(l2[j]))
sin.append(strx)
else:
# print("Não deu!")
return []
# sin.append()
# print(r2.content)
else:
# print("Não deu!")
return []
return remove_duplicates(sin)
print(synsets("camisola"))
##Testes com encoding
# x = 'mo\xc3\x83\xc2\xa7a'
# print(x.encode('ascii').decode('iso-8859-1'))