Skip to content

Commit 9fa0d61

Browse files
authored
#77 Top_k feature Added (#115)
Co-authored-by: Karanveer Singh Sidhu <>
1 parent 010c04b commit 9fa0d61

File tree

3 files changed

+27
-8
lines changed

3 files changed

+27
-8
lines changed

src/config.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,5 +39,4 @@ llmargs:
3939
temperature: 0.5
4040
max_tokens: 1024
4141
output: ../output/AspectAdded/semeval-agg/aspectAdded.pkl
42-
43-
42+
top_k_aspects: 1

src/llm/aspect_extraction_pipeline.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class LLMReviewProcessor:
1717
def __init__(self, cfg: DictConfig):
1818
self.cfg = cfg
1919
self.llm_handler = self._init_llm_handler()
20-
self.prompt_builder = PromptBuilder()
20+
self.prompt_builder = PromptBuilder(top_k=cfg.llmargs.top_k_aspects)
2121

2222
def _init_llm_handler(self):
2323
config = LLMconfig(
@@ -39,7 +39,6 @@ def find_aspect_indices(aspect: str, sentence_tokens) :
3939
for i in range(len(tokens) - len(aspect_tokens) + 1):
4040
if tokens[i:i + len(aspect_tokens)] == aspect_tokens: return list(range(i, i + len(aspect_tokens)))
4141

42-
4342
return -1
4443

4544
def process_reviews(self, reviews: list):
@@ -49,13 +48,32 @@ def process_reviews(self, reviews: list):
4948
if sample_review.get('implicit', [False])[0] is not True: continue
5049

5150
prompt = self.prompt_builder.build_prompt(sample_review)
52-
response = self.llm_handler.get_response(prompt)
53-
matches = re.findall(r'\{.*?\}', response, re.DOTALL)
51+
52+
max_retries = 5
53+
valid_json_found = False
54+
matches = []
55+
56+
for attempt in range(max_retries):
57+
response = self.llm_handler.get_response(prompt)
58+
matches = re.findall(r'\{.*?\}', response, re.DOTALL)
59+
60+
for json_str in matches:
61+
try:
62+
aspect_data = json.loads(json_str)
63+
if "aspect" in aspect_data and aspect_data["aspect"]:
64+
valid_json_found = True
65+
break
66+
except json.JSONDecodeError:
67+
continue
68+
69+
if valid_json_found:
70+
break
71+
else:
72+
print(f"Invalid or no valid JSON with 'aspect' found. Attempt {attempt + 1} of {max_retries}")
5473

5574
if not matches:
5675
print("No JSON object found in response")
5776
continue
58-
5977
all_aspects = []
6078
seen_aspects = set()
6179
tokens = [word.strip().lower() for sentences in sample_review["sentences"] for word in sentences]

src/llm/prompt_builder.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11

22
class PromptBuilder:
3-
def __init__(self, task_description=None):
3+
def __init__(self, task_description=None, top_k=1):
44
self.task_description = task_description or (
55
"Identify the latent aspect targeted by the sentiment in the review. "
66
"If the aspect is explicitly mentioned, return its index; if it's implicit, return the inferred aspect and use index -1."
77
)
8+
self.top_k = top_k
89

910
def build_prompt(self, review_entry: dict) -> str:
1011
review_text = ' '.join(review_entry['sentences'][0])
1112
prompt = (
1213
f"Review: \"{review_text}\"\n"
1314
f"Task: {self.task_description}\n"
15+
f"Return exactly the top {self.top_k} aspect(s) that best represent the sentiment in this review.\n"
1416
f"Output Format: {{\"aspect\": \"<aspect_name>\", \"index\": <index_list or -1>}}"
1517
)
1618

0 commit comments

Comments
 (0)