Skip to content
This repository was archived by the owner on Nov 8, 2022. It is now read-only.

Commit f6dfc8c

Browse files
authored
fixed import (#148)
* fixed import * merged utils into sentiment sol * deleted duplicate import
1 parent 6f42da2 commit f6dfc8c

File tree

2 files changed

+57
-75
lines changed

2 files changed

+57
-75
lines changed

solutions/absa_solution/sentiment_solution.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,17 @@
2222
import numpy as np
2323
import pandas as pd
2424
from tqdm import tqdm
25+
import csv
26+
import re
27+
from abc import abstractmethod
2528

2629
from nlp_architect import LIBRARY_OUT
2730
from nlp_architect.common.core_nlp_doc import CoreNLPDoc
2831
from nlp_architect.models.absa.inference.data_types import (
2932
TermType,
3033
SentimentDocEncoder,
3134
SentimentDoc,
35+
SentimentSentence,
3236
)
3337
from nlp_architect.models.absa.inference.inference import SentimentInference
3438
from nlp_architect.models.absa.utils import load_opinion_lex
@@ -40,11 +44,62 @@
4044
line_count,
4145
)
4246

43-
from .utils import Anonymiser, _ui_format
44-
4547
SENTIMENT_OUT = LIBRARY_OUT / "absa_solution"
4648

4749

50+
class Anonymiser(object):
51+
"""Abstract class for anonymiser algorithm, intended for privacy keeping."""
52+
53+
@abstractmethod
54+
def run(self, text):
55+
pass
56+
57+
58+
class TweetAnonymiser(Anonymiser):
59+
"""Anonymiser for tweets which uses lexicon for simple string replacements."""
60+
61+
def __init__(self, lexicon_path):
62+
self.entity_dict = self._init_entity_dict(lexicon_path)
63+
64+
@staticmethod
65+
def _init_entity_dict(lexicon_path):
66+
ret = {}
67+
with open(lexicon_path, encoding="utf-8") as f:
68+
for row in csv.reader(f):
69+
ret[row[0]] = [_ for _ in row[1:] if _]
70+
return ret
71+
72+
def run(self, text):
73+
for anonymised, entities in self.entity_dict.items():
74+
for entity in entities:
75+
text = re.sub(entity, anonymised, text, flags=re.IGNORECASE)
76+
text = " ".join(
77+
[
78+
"@other_entity"
79+
if (word.startswith("@") and word[1:] not in self.entity_dict.keys())
80+
else word
81+
for word in text.split()
82+
]
83+
)
84+
return text
85+
86+
87+
def _ui_format(sent: SentimentSentence, doc: SentimentDoc) -> str:
88+
"""Get sentence as HTML with 4 classes: aspects, opinions, negations and intensifiers."""
89+
text = doc.doc_text[sent.start : sent.end + 1]
90+
seen = set()
91+
for term in sorted([t for e in sent.events for t in e], key=lambda t: t.start)[::-1]:
92+
if term.start not in seen:
93+
seen.add(term.start)
94+
start = term.start - sent.start
95+
end = start + term.len
96+
label = term.type.value + "_" + term.polarity.value
97+
text = "".join(
98+
(text[:start], '<span class="', label, '">', text[start:end], "</span>", text[end:])
99+
)
100+
return text
101+
102+
48103
class SentimentSolution(object):
49104
"""Main class for executing Sentiment Solution pipeline.
50105

solutions/absa_solution/utils.py

Lines changed: 0 additions & 73 deletions
This file was deleted.

0 commit comments

Comments
 (0)