44This module provides functionality to:
55- Validate reference sentence data structure
66- Convert LLM response embeddings to probability distributions
7- - Calculate survey response PDFs using different reference sets
7+ - Calculate survey response PMFs using different reference sets
88- Compare responses against mean or specific reference sets
99
1010The module is particularly useful for analyzing Likert scale responses from LLMs
@@ -88,10 +88,10 @@ class EmbeddingsRater:
8888 >>> # Initialize rater
8989 >>> rater = EmbeddingsRater(df, embeddings_column='embedding_small')
9090 >>>
91- >>> # Get PDFs for some LLM responses
91+ >>> # Get PMFs for some LLM responses
9292 >>> llm_responses = np.random.rand(5, 384) # 5 responses, each with 384-dim embedding
93- >>> pdfs = rater.get_response_pdfs ('set1', llm_responses)
94- >>> survey_pdf = rater.get_survey_response_pdf(pdfs )
93+ >>> pmfs = rater.get_response_pmfs ('set1', llm_responses)
94+ >>> survey_pmf = rater.get_survey_response_pmf(pmfs )
9595 """
9696
9797 def __init__ (
@@ -124,9 +124,11 @@ def __init__(
124124 M = np .array (this_set [embeddings_column ].to_list ()).T
125125 self .reference_matrices [sentence_set ] = M
126126
127- def get_response_pdfs (self , reference_set_id , llm_response_matrix , temperature = 1.0 ):
127+ def get_response_pmfs (
128+ self , reference_set_id , llm_response_matrix , temperature = 1.0 , epsilon = 0.0
129+ ):
128130 """
129- Convert LLM response embeddings to PDFs using specified reference set.
131+ Convert LLM response embeddings to PMFs using specified reference set.
130132
131133 Parameters
132134 ----------
@@ -136,57 +138,60 @@ def get_response_pdfs(self, reference_set_id, llm_response_matrix, temperature=1
136138 Matrix of LLM response embeddings
137139 Shape: (n_responses, n_dimensions)
138140 temperature : float
139- Get scaled pdf With temperature T:
141+ Get scaled pmf With temperature T:
140142 ``p_new[i] ~ p_old[i]^(1/T)``.
143+ epsilon : float, optional
144+ Small regularization parameter to prevent division by zero and add smoothing.
145+ Default is 0.0 (no regularization).
141146
142147 Returns
143148 -------
144149 numpy.ndarray
145150 Probability distributions for each response
146151 """
147152 if isinstance (reference_set_id , str ) and reference_set_id .lower () == "mean" :
148- # Calculate PDFs using mean over all reference sets
149- llm_response_pdfs = np .array (
153+ # Calculate PMFs using mean over all reference sets
154+ llm_response_pmfs = np .array (
150155 [
151- compute .response_embeddings_to_pdf (llm_response_matrix , M )
156+ compute .response_embeddings_to_pmf (llm_response_matrix , M , epsilon )
152157 for M in self .reference_matrices .values ()
153158 ]
154159 ).mean (axis = 0 )
155160 else :
156- # Calculate PDFs using specific reference set
161+ # Calculate PMFs using specific reference set
157162 M = self .reference_matrices [reference_set_id ]
158- llm_response_pdfs = compute .response_embeddings_to_pdf (
159- llm_response_matrix , M
163+ llm_response_pmfs = compute .response_embeddings_to_pmf (
164+ llm_response_matrix , M , epsilon
160165 )
161166
162167 if temperature != 1.0 :
163- llm_response_pdfs = np .array (
164- [compute .scale_pdf ( _pdf , temperature ) for _pdf in llm_response_pdfs ]
168+ llm_response_pmfs = np .array (
169+ [compute .scale_pmf ( _pmf , temperature ) for _pmf in llm_response_pmfs ]
165170 )
166171
167- return llm_response_pdfs
172+ return llm_response_pmfs
168173
169- def get_survey_response_pdf (self , response_pdfs ):
174+ def get_survey_response_pmf (self , response_pmfs ):
170175 """
171- Calculate the overall survey response PDF by averaging individual response PDFs .
176+ Calculate the overall survey response PMF by averaging individual response PMFs .
172177
173178 Parameters
174179 ----------
175- response_pdfs : numpy.ndarray
176- Matrix of individual response PDFs
180+ response_pmfs : numpy.ndarray
181+ Matrix of individual response PMFs
177182
178183 Returns
179184 -------
180185 numpy.ndarray
181- Average PDF representing the overall survey response
186+ Average PMF representing the overall survey response
182187 """
183- return response_pdfs .mean (axis = 0 )
188+ return response_pmfs .mean (axis = 0 )
184189
185- def get_survey_response_pdf_by_reference_set_id (
186- self , reference_set_id , llm_response_matrix , temperature = 1.0
190+ def get_survey_response_pmf_by_reference_set_id (
191+ self , reference_set_id , llm_response_matrix , temperature = 1.0 , epsilon = 0.0
187192 ):
188193 """
189- Get the survey response PDF using a specific reference set.
194+ Get the survey response PMF using a specific reference set.
190195
191196 Parameters
192197 ----------
@@ -196,14 +201,19 @@ def get_survey_response_pdf_by_reference_set_id(
196201 Matrix of LLM response embeddings
197202 Shape: (n_responses, n_dimensions)
198203 temperature : float, default = 1.0
199- Get scaled pdf With temperature T:
204+ Get scaled pmf With temperature T:
200205 ``p_new[i] ~ p_old[i]^(1/T)``.
206+ epsilon : float, optional
207+ Small regularization parameter to prevent division by zero and add smoothing.
208+ Default is 0.0 (no regularization).
201209
202210 Returns
203211 -------
204212 numpy.ndarray
205- Average PDF representing the overall survey response
213+ Average PMF representing the overall survey response
206214 """
207- return self .get_survey_response_pdf (
208- self .get_response_pdfs (reference_set_id , llm_response_matrix )
215+ return self .get_survey_response_pmf (
216+ self .get_response_pmfs (
217+ reference_set_id , llm_response_matrix , temperature , epsilon
218+ )
209219 )
0 commit comments