Skip to content

Commit 8e6834e

Browse files
committed
⌫ Remove fluff
1 parent 286cf03 commit 8e6834e

File tree

5 files changed

+5
-424
lines changed

5 files changed

+5
-424
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ The ESR methodology works by:
6767

6868
- `EmbeddingsRater`: Main class implementing the ESR methodology
6969
- `response_embeddings_to_pdf()`: Core function for similarity-to-probability conversion
70-
- `scale_pdf()` and `scale_pdf_no_max_temp()`: Temperature scaling functions
70+
- `scale_pdf()`: Temperature scaling function
7171

7272
## Citation
7373

embeddings_similarity_rating/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from beartype.claw import beartype_this_package
1212

13-
from .compute import response_embeddings_to_pdf, scale_pdf, scale_pdf_no_max_temp
13+
from .compute import response_embeddings_to_pdf, scale_pdf
1414
from .embeddings_rater import EmbeddingsRater
1515

1616
__version__ = "1.0.0"
@@ -20,7 +20,6 @@
2020
"EmbeddingsRater",
2121
"response_embeddings_to_pdf",
2222
"scale_pdf",
23-
"scale_pdf_no_max_temp",
2423
]
2524

2625
beartype_this_package()

embeddings_similarity_rating/compute.py

Lines changed: 2 additions & 238 deletions
Original file line numberDiff line numberDiff line change
@@ -11,57 +11,12 @@
1111
The module is particularly useful for working with Likert scale responses and their
1212
embeddings, providing tools to analyze and transform the underlying probability
1313
distributions.
14-
15-
Examples
16-
--------
17-
>>> x = np.arange(1,6)
18-
>>> pdf = np.array([0.1,0.15,0.05,0.2,0.5])
19-
>>> real_mean = 3.0
20-
>>> T, scaled_pdf = get_optimal_temperature_mean(x, pdf, real_mean)
2114
"""
2215

2316
import numpy as np
24-
from scipy.optimize import minimize
25-
26-
27-
def cos_to_pdf(cos):
28-
"""
29-
Convert cosine similarities to a probability density function (PDF).
30-
31-
Parameters
32-
----------
33-
cos : array_like
34-
Array of cosine similarity values
35-
36-
Returns
37-
-------
38-
numpy.ndarray
39-
Normalized PDF where all values sum to 1
40-
"""
41-
hist = np.array(cos) - np.min(cos)
42-
return hist / hist.sum()
4317

4418

45-
def cos_sim(emb1, emb2):
46-
"""
47-
Calculate cosine similarity between two embeddings.
48-
49-
Parameters
50-
----------
51-
emb1 : array_like
52-
First embedding vector
53-
emb2 : array_like
54-
Second embedding vector
55-
56-
Returns
57-
-------
58-
float
59-
Cosine similarity score between 0 and 1
60-
"""
61-
return (1 + cos_sim_pdf(emb1, emb2)) / 2
62-
63-
64-
def scale_pdf(pdf, temperature, max_temp=10):
19+
def scale_pdf(pdf, temperature, max_temp=np.inf):
6520
"""
6621
Scale a PDF using temperature scaling.
6722
@@ -72,7 +27,7 @@ def scale_pdf(pdf, temperature, max_temp=10):
7227
temperature : float
7328
Temperature parameter for scaling (0 to max_temp)
7429
max_temp : float, optional
75-
Maximum temperature value, by default 10
30+
Maximum temperature value, by default np.inf
7631
7732
Returns
7833
-------
@@ -99,187 +54,6 @@ def scale_pdf(pdf, temperature, max_temp=10):
9954
return hist / hist.sum()
10055

10156

102-
def scale_pdf_no_max_temp(pdf, temperature):
103-
"""Calls ``scale_pdf(pdf, temperature, max_temp=np.inf)``"""
104-
return scale_pdf(pdf, temperature, max_temp=np.inf)
105-
106-
107-
def cos_sim_pdf(pdf1, pdf2):
108-
"""
109-
Calculate cosine similarity between two PDFs.
110-
111-
Parameters
112-
----------
113-
pdf1 : array_like
114-
First probability density function
115-
pdf2 : array_like
116-
Second probability density function
117-
118-
Returns
119-
-------
120-
float
121-
Cosine similarity between the PDFs
122-
"""
123-
return pdf1.dot(pdf2) / np.linalg.norm(pdf1) / np.linalg.norm(pdf2)
124-
125-
126-
def KS_sim_pdf(pdf1, pdf2):
127-
"""
128-
Calculate Kolmogorov-Smirnov similarity between two PDFs.
129-
130-
Parameters
131-
----------
132-
pdf1 : array_like
133-
First probability density function
134-
pdf2 : array_like
135-
Second probability density function
136-
137-
Returns
138-
-------
139-
float
140-
KS similarity score between 0 and 1
141-
"""
142-
return 1 - np.max(np.abs(np.cumsum(pdf1) - np.cumsum(pdf2)))
143-
144-
145-
def pdf_moment(pdf, x, m):
146-
"""
147-
Calculate the m-th moment of a PDF.
148-
149-
Parameters
150-
----------
151-
pdf : array_like
152-
Probability density function
153-
x : array_like
154-
Values corresponding to the PDF
155-
m : int
156-
Order of the moment to calculate
157-
158-
Returns
159-
-------
160-
float
161-
The m-th moment of the PDF
162-
"""
163-
return pdf.dot(x**m)
164-
165-
166-
def mean(pdf, x):
167-
"""
168-
Calculate the mean of a PDF.
169-
170-
Parameters
171-
----------
172-
pdf : array_like
173-
Probability density function
174-
x : array_like
175-
Values corresponding to the PDF
176-
177-
Returns
178-
-------
179-
float
180-
Mean value of the PDF
181-
"""
182-
return pdf_moment(pdf, x, m=1)
183-
184-
185-
def var(pdf, x):
186-
"""
187-
Calculate the variance of a PDF.
188-
189-
Parameters
190-
----------
191-
pdf : array_like
192-
Probability density function
193-
x : array_like
194-
Values corresponding to the PDF
195-
196-
Returns
197-
-------
198-
float
199-
Variance of the PDF
200-
"""
201-
_x_ = mean(pdf, x)
202-
_x2_ = pdf_moment(pdf, x, m=2)
203-
return _x2_ - _x_**2
204-
205-
206-
def std(pdf, x):
207-
"""
208-
Calculate the standard deviation of a PDF.
209-
210-
Parameters
211-
----------
212-
pdf : array_like
213-
Probability density function
214-
x : array_like
215-
Values corresponding to the PDF
216-
217-
Returns
218-
-------
219-
float
220-
Standard deviation of the PDF
221-
"""
222-
return np.sqrt(var(pdf, x))
223-
224-
225-
def get_optimal_temperature_mean(x, pdf, real_mean):
226-
"""
227-
Find the optimal temperature that matches the mean of a scaled PDF to a target mean.
228-
229-
Parameters
230-
----------
231-
x : array_like
232-
Values corresponding to the PDF
233-
pdf : array_like
234-
Input probability density function
235-
real_mean : float
236-
Target mean value
237-
238-
Returns
239-
-------
240-
tuple
241-
(optimal_temperature, scaled_pdf)
242-
"""
243-
244-
def _obj(T):
245-
return (mean(scale_pdf(pdf, T), x) - real_mean) ** 2
246-
247-
T0 = 1.0
248-
res = minimize(_obj, T0, bounds=[(0, 10.0)])
249-
250-
T = res.x[0]
251-
pdf = scale_pdf(pdf, T)
252-
return T, pdf
253-
254-
255-
def get_optimal_temperature_KS_sim(pdf, real_pdf):
256-
"""
257-
Find the optimal temperature that maximizes KS similarity between scaled PDF and target PDF.
258-
259-
Parameters
260-
----------
261-
pdf : array_like
262-
Input probability density function
263-
real_pdf : array_like
264-
Target probability density function
265-
266-
Returns
267-
-------
268-
tuple
269-
(optimal_temperature, scaled_pdf)
270-
"""
271-
272-
def _obj(T):
273-
return -KS_sim_pdf(scale_pdf(pdf, T), real_pdf)
274-
275-
T0 = 1.0
276-
res = minimize(_obj, T0, bounds=[(0, 10.0)])
277-
278-
T = res.x[0]
279-
pdf = scale_pdf(pdf, T)
280-
return T, pdf
281-
282-
28357
def response_embeddings_to_pdf(matrix_responses, matrix_likert_sentences):
28458
"""
28559
Convert response embeddings and Likert sentence embeddings to a PDF.
@@ -314,13 +88,3 @@ def response_embeddings_to_pdf(matrix_responses, matrix_likert_sentences):
31488
pdf = cos / sum_per_row[:, None]
31589

31690
return pdf
317-
318-
319-
if __name__ == "__main__":
320-
# Example usage with test data
321-
x = np.arange(1, 6)
322-
pdf = np.array([0.1, 0.15, 0.05, 0.2, 0.5])
323-
realpdf = np.array([0.1, 0.15, 0.5, 0.15, 0.1])
324-
real_mean = 3.0
325-
print(get_optimal_temperature_mean(x, pdf, real_mean))
326-
print(get_optimal_temperature_KS_sim(pdf, real_mean))

embeddings_similarity_rating/embeddings_rater.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,7 @@ def get_response_pdfs(self, reference_set_id, llm_response_matrix, temperature=1
161161

162162
if temperature != 1.0:
163163
llm_response_pdfs = np.array(
164-
[
165-
compute.scale_pdf_no_max_temp(_pdf, temperature)
166-
for _pdf in llm_response_pdfs
167-
]
164+
[compute.scale_pdf(_pdf, temperature) for _pdf in llm_response_pdfs]
168165
)
169166

170167
return llm_response_pdfs

0 commit comments

Comments
 (0)