1111The module is particularly useful for working with Likert scale responses and their
1212embeddings, providing tools to analyze and transform the underlying probability
1313distributions.
14-
15- Examples
16- --------
17- >>> x = np.arange(1,6)
18- >>> pdf = np.array([0.1,0.15,0.05,0.2,0.5])
19- >>> real_mean = 3.0
20- >>> T, scaled_pdf = get_optimal_temperature_mean(x, pdf, real_mean)
2114"""
2215
2316import numpy as np
24- from scipy .optimize import minimize
25-
26-
27- def cos_to_pdf (cos ):
28- """
29- Convert cosine similarities to a probability density function (PDF).
30-
31- Parameters
32- ----------
33- cos : array_like
34- Array of cosine similarity values
35-
36- Returns
37- -------
38- numpy.ndarray
39- Normalized PDF where all values sum to 1
40- """
41- hist = np .array (cos ) - np .min (cos )
42- return hist / hist .sum ()
4317
4418
45- def cos_sim (emb1 , emb2 ):
46- """
47- Calculate cosine similarity between two embeddings.
48-
49- Parameters
50- ----------
51- emb1 : array_like
52- First embedding vector
53- emb2 : array_like
54- Second embedding vector
55-
56- Returns
57- -------
58- float
59- Cosine similarity score between 0 and 1
60- """
61- return (1 + cos_sim_pdf (emb1 , emb2 )) / 2
62-
63-
64- def scale_pdf (pdf , temperature , max_temp = 10 ):
19+ def scale_pdf (pdf , temperature , max_temp = np .inf ):
6520 """
6621 Scale a PDF using temperature scaling.
6722
@@ -72,7 +27,7 @@ def scale_pdf(pdf, temperature, max_temp=10):
7227 temperature : float
7328 Temperature parameter for scaling (0 to max_temp)
7429 max_temp : float, optional
75- Maximum temperature value, by default 10
30+ Maximum temperature value, by default np.inf
7631
7732 Returns
7833 -------
@@ -99,187 +54,6 @@ def scale_pdf(pdf, temperature, max_temp=10):
9954 return hist / hist .sum ()
10055
10156
102- def scale_pdf_no_max_temp (pdf , temperature ):
103- """Calls ``scale_pdf(pdf, temperature, max_temp=np.inf)``"""
104- return scale_pdf (pdf , temperature , max_temp = np .inf )
105-
106-
107- def cos_sim_pdf (pdf1 , pdf2 ):
108- """
109- Calculate cosine similarity between two PDFs.
110-
111- Parameters
112- ----------
113- pdf1 : array_like
114- First probability density function
115- pdf2 : array_like
116- Second probability density function
117-
118- Returns
119- -------
120- float
121- Cosine similarity between the PDFs
122- """
123- return pdf1 .dot (pdf2 ) / np .linalg .norm (pdf1 ) / np .linalg .norm (pdf2 )
124-
125-
126- def KS_sim_pdf (pdf1 , pdf2 ):
127- """
128- Calculate Kolmogorov-Smirnov similarity between two PDFs.
129-
130- Parameters
131- ----------
132- pdf1 : array_like
133- First probability density function
134- pdf2 : array_like
135- Second probability density function
136-
137- Returns
138- -------
139- float
140- KS similarity score between 0 and 1
141- """
142- return 1 - np .max (np .abs (np .cumsum (pdf1 ) - np .cumsum (pdf2 )))
143-
144-
145- def pdf_moment (pdf , x , m ):
146- """
147- Calculate the m-th moment of a PDF.
148-
149- Parameters
150- ----------
151- pdf : array_like
152- Probability density function
153- x : array_like
154- Values corresponding to the PDF
155- m : int
156- Order of the moment to calculate
157-
158- Returns
159- -------
160- float
161- The m-th moment of the PDF
162- """
163- return pdf .dot (x ** m )
164-
165-
166- def mean (pdf , x ):
167- """
168- Calculate the mean of a PDF.
169-
170- Parameters
171- ----------
172- pdf : array_like
173- Probability density function
174- x : array_like
175- Values corresponding to the PDF
176-
177- Returns
178- -------
179- float
180- Mean value of the PDF
181- """
182- return pdf_moment (pdf , x , m = 1 )
183-
184-
185- def var (pdf , x ):
186- """
187- Calculate the variance of a PDF.
188-
189- Parameters
190- ----------
191- pdf : array_like
192- Probability density function
193- x : array_like
194- Values corresponding to the PDF
195-
196- Returns
197- -------
198- float
199- Variance of the PDF
200- """
201- _x_ = mean (pdf , x )
202- _x2_ = pdf_moment (pdf , x , m = 2 )
203- return _x2_ - _x_ ** 2
204-
205-
206- def std (pdf , x ):
207- """
208- Calculate the standard deviation of a PDF.
209-
210- Parameters
211- ----------
212- pdf : array_like
213- Probability density function
214- x : array_like
215- Values corresponding to the PDF
216-
217- Returns
218- -------
219- float
220- Standard deviation of the PDF
221- """
222- return np .sqrt (var (pdf , x ))
223-
224-
225- def get_optimal_temperature_mean (x , pdf , real_mean ):
226- """
227- Find the optimal temperature that matches the mean of a scaled PDF to a target mean.
228-
229- Parameters
230- ----------
231- x : array_like
232- Values corresponding to the PDF
233- pdf : array_like
234- Input probability density function
235- real_mean : float
236- Target mean value
237-
238- Returns
239- -------
240- tuple
241- (optimal_temperature, scaled_pdf)
242- """
243-
244- def _obj (T ):
245- return (mean (scale_pdf (pdf , T ), x ) - real_mean ) ** 2
246-
247- T0 = 1.0
248- res = minimize (_obj , T0 , bounds = [(0 , 10.0 )])
249-
250- T = res .x [0 ]
251- pdf = scale_pdf (pdf , T )
252- return T , pdf
253-
254-
255- def get_optimal_temperature_KS_sim (pdf , real_pdf ):
256- """
257- Find the optimal temperature that maximizes KS similarity between scaled PDF and target PDF.
258-
259- Parameters
260- ----------
261- pdf : array_like
262- Input probability density function
263- real_pdf : array_like
264- Target probability density function
265-
266- Returns
267- -------
268- tuple
269- (optimal_temperature, scaled_pdf)
270- """
271-
272- def _obj (T ):
273- return - KS_sim_pdf (scale_pdf (pdf , T ), real_pdf )
274-
275- T0 = 1.0
276- res = minimize (_obj , T0 , bounds = [(0 , 10.0 )])
277-
278- T = res .x [0 ]
279- pdf = scale_pdf (pdf , T )
280- return T , pdf
281-
282-
28357def response_embeddings_to_pdf (matrix_responses , matrix_likert_sentences ):
28458 """
28559 Convert response embeddings and Likert sentence embeddings to a PDF.
@@ -314,13 +88,3 @@ def response_embeddings_to_pdf(matrix_responses, matrix_likert_sentences):
31488 pdf = cos / sum_per_row [:, None ]
31589
31690 return pdf
317-
318-
319- if __name__ == "__main__" :
320- # Example usage with test data
321- x = np .arange (1 , 6 )
322- pdf = np .array ([0.1 , 0.15 , 0.05 , 0.2 , 0.5 ])
323- realpdf = np .array ([0.1 , 0.15 , 0.5 , 0.15 , 0.1 ])
324- real_mean = 3.0
325- print (get_optimal_temperature_mean (x , pdf , real_mean ))
326- print (get_optimal_temperature_KS_sim (pdf , real_mean ))
0 commit comments