Skip to content

Commit 1296021

Browse files
sentence transformers and tests
1 parent 77eaef7 commit 1296021

File tree

2 files changed

+125
-6
lines changed

2 files changed

+125
-6
lines changed
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Copyright 2025 Telefónica Innovación Digital, S.L.
4+
This file is part of Toolium.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
"""
18+
19+
import os
20+
import json
21+
import pytest
22+
23+
from toolium.driver_wrappers_pool import DriverWrappersPool
24+
from toolium.utils.ai_utils.text_analysis import (get_text_criteria_analysis_openai,
25+
get_text_criteria_analysis_sentence_transformers)
26+
27+
28+
def configure_default_openai_model():
29+
"""
30+
Configure OpenAI model used in unit tests
31+
"""
32+
config = DriverWrappersPool.get_default_wrapper().config
33+
try:
34+
config.add_section('AI')
35+
except Exception:
36+
pass
37+
config.set('AI', 'openai_model', 'gpt-4o-mini')
38+
39+
40+
get_analysis_examples = (
41+
('How are you today?', ["is a greeting phrase", "is a question"], 0.7, 1),
42+
('Today is sunny', ["is an affirmation", "talks about the weather"], 0.7, 1),
43+
('I love programming', ["expresses a positive sentiment"], 0.7, 1),
44+
('How are you today?', ["is an affirmation", "talks about the weather"], 0.0, 0.2),
45+
('Today is sunny', ["is a greeting phrase", "is a question"], 0.0, 0.2),
46+
('I love programming', ["is a greeting phrase", "is a question"], 0.0, 0.2),
47+
)
48+
49+
50+
@pytest.mark.skipif(os.getenv("AZURE_OPENAI_API_KEY") is None,
51+
reason="AZURE_OPENAI_API_KEY environment variable not set")
52+
@pytest.mark.parametrize('input_text, features_list, expected_low, expected_high', get_analysis_examples)
53+
def test_get_text_analysis(input_text, features_list, expected_low, expected_high):
54+
similarity = json.loads(get_text_criteria_analysis_openai(input_text, features_list, azure=True))
55+
assert expected_low <= similarity['overall_match'] <= expected_high,\
56+
f"Overall match {similarity['overall_match']} not in range"
57+
58+
59+
extra_task = """
60+
Additional task:
61+
62+
Extract all verbs from the input text and add them to the JSON under data.verbs.
63+
64+
Rules:
65+
- Use the same language as the input text.
66+
- Return verbs in their base/infinitive form when possible.
67+
- Do not repeat verbs (no duplicates).
68+
- Preserve the order in which they first appear in the text.
69+
- Verbs should be in this base/infinitive form.
70+
71+
The data field must include:
72+
"data": {
73+
"verbs": [ "<verb1>", "<verb2>", ... ]
74+
}
75+
If no verbs are found, set "verbs" to an empty array: "verbs": [].
76+
"""
77+
78+
get_extra_examples = (
79+
('How are you today?', ["is a greeting phrase", "is a question"], ['be']),
80+
('I wrote a letter', ["is an affirmation", "talks about the weather"], ['write']),
81+
('I have to go', ["expresses a positive sentiment"], ['have', 'go']),
82+
('I went to Madrid', ["is an affirmation", "talks about the weather"], ['go']),
83+
('Oops I did it again', ["is a greeting phrase", "is a question"], ['do'])
84+
)
85+
86+
@pytest.mark.skipif(os.getenv("AZURE_OPENAI_API_KEY") is None,
87+
reason="AZURE_OPENAI_API_KEY environment variable not set")
88+
@pytest.mark.parametrize('input_text, features_list, verb_list', get_extra_examples)
89+
def test_get_text_analysis_extra_features(input_text, features_list, verb_list):
90+
similarity = json.loads(get_text_criteria_analysis_openai(input_text, features_list,
91+
azure=True, extra_tasks=extra_task))
92+
assert similarity['data']['verbs'] == verb_list
93+
94+
95+
examples_sentence_transformers = (
96+
('How are you today?', ["hello!", "What's up"], 0.4, 1),
97+
('Today is not sunny', ["it's raining"], 0.4, 1),
98+
('I love programming', ["I like code", "I love to cook"], 0.4, 1),
99+
('How are you today?', ["it's raining", "this text is an affirmation"], 0.0, 0.3),
100+
('Today is sunny', ["I like code", "I love to cook"], 0.0, 0.3),
101+
('I love programming', ["hello!", "What's up"], 0.0, 0.3),
102+
)
103+
104+
105+
# @pytest.mark.skip(reason='Sentence Transformers model is not available in the CI environment')
106+
@pytest.mark.parametrize('input_text, features_list, expected_low, expected_high', examples_sentence_transformers)
107+
def test_get_text_analysis_sentence_transformers(input_text, features_list, expected_low, expected_high):
108+
similarity = get_text_criteria_analysis_sentence_transformers(input_text, features_list)
109+
assert expected_low <= similarity['overall_match'] <= expected_high

toolium/utils/ai_utils/text_analysis.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def build_system_message(characteristics):
4646
4747
Tasks:
4848
1) For EACH characteristic, decide how well the text satisfies it on a scale from 0.0 (does not satisfy it at all) to 1.0 (perfectly satisfies it). Consider style, tone and content when relevant.
49-
2) Only for each low scored characteristic (<=0.2), output:
49+
2) ONLY for each low scored characteristic (<=0.2), output:
5050
- "name": the exact characteristic name as listed above.
5151
- "score": a float between 0.0 and 0.2.
5252
3) Compute an overall score "overall_match" between 0.0 and 1.0 that summarizes how well the text matches the whole set. It does not have to be a simple arithmetic mean, but must still be in [0.0, 1.0].
@@ -75,6 +75,7 @@ def build_system_message(characteristics):
7575
}}
7676
7777
Constraints:
78+
- Do NOT include scores for high valued (<=0.2) features at features list.
7879
- The "data" field must ALWAYS be present. If there are no extra sections, it MUST be: "data": {{}}.
7980
- Use a dot as decimal separator (e.g. 0.75, not 0,75).
8081
- Use at most 2 decimal places for all scores.
@@ -109,14 +110,17 @@ def get_text_criteria_analysis_openai(text_input, target_features, extra_tasks=N
109110
msg.append(task)
110111
else:
111112
msg.append(extra_tasks)
112-
return openai_request(system_message, text_input, model_name, azure, **kwargs)
113+
return openai_request(msg, text_input, model_name, azure, **kwargs)
113114

114115

115116
def get_text_criteria_analysis_sentence_transformers(text_input, target_features, extra_tasks=None,
116117
model_name=None, azure=True, **kwargs):
117118
"""
118-
Get text criteria analysis using Sentence Transformers. To analyze how well a given text
119-
matches a set of target characteristics.
119+
Get text criteria analysis using Sentence Transformers. Sentence Transformers works better using examples
120+
that are semantically similar, so this method is more suitable for evaluating characteristics like
121+
"is a greeting phrase", "talks about the weather", etc.
122+
The response is a structured JSON object with overall match score, individual feature scores,
123+
and additional data sections.
120124
121125
:param text_input: text to analyze
122126
:param target_features: list of target characteristics to evaluate
@@ -128,6 +132,12 @@ def get_text_criteria_analysis_sentence_transformers(text_input, target_features
128132
if SentenceTransformer is None:
129133
raise ImportError("Sentence Transformers is not installed. Please run 'pip install toolium[ai]'"
130134
" to use Sentence Transformers features")
135+
136+
def similarity_to_score(cos_sim):
137+
if cos_sim <= 0.1:
138+
return 0.0
139+
return cos_sim / 0.7
140+
131141
config = DriverWrappersPool.get_default_wrapper().config
132142
model_name = model_name or config.get_optional('AI', 'sentence_transformers_model', 'all-mpnet-base-v2')
133143
model = SentenceTransformer(model_name, **kwargs)
@@ -141,10 +151,10 @@ def get_text_criteria_analysis_sentence_transformers(text_input, target_features
141151
# Generate contracted results
142152
for f, sim in zip(target_features, sims):
143153
# Normalize similarity from [-1, 1] to [0, 1]
144-
score = (sim + 1.0) / 2.0
154+
score = similarity_to_score(sim)
145155
results.append({
146156
"name": f,
147-
"score": round(score, 2),
157+
"score": round(score, 2)
148158
})
149159

150160
# overall score as average of feature scores

0 commit comments

Comments
 (0)