⌫ Remove fluff

ulfaslak · ulfaslak · commit 8e6834e61d6f · 2025-08-01T10:00:57.000+02:00
diff --git a/README.md b/README.md
@@ -67,7 +67,7 @@ The ESR methodology works by:
 
 - `EmbeddingsRater`: Main class implementing the ESR methodology
 - `response_embeddings_to_pdf()`: Core function for similarity-to-probability conversion
-- `scale_pdf()` and `scale_pdf_no_max_temp()`: Temperature scaling functions
+- `scale_pdf()`: Temperature scaling function
 
 ## Citation
 
diff --git a/embeddings_similarity_rating/__init__.py b/embeddings_similarity_rating/__init__.py
@@ -10,7 +10,7 @@
 
 from beartype.claw import beartype_this_package
 
-from .compute import response_embeddings_to_pdf, scale_pdf, scale_pdf_no_max_temp
+from .compute import response_embeddings_to_pdf, scale_pdf
 from .embeddings_rater import EmbeddingsRater
 
 __version__ = "1.0.0"
@@ -20,7 +20,6 @@
     "EmbeddingsRater",
     "response_embeddings_to_pdf",
     "scale_pdf",
-    "scale_pdf_no_max_temp",
 ]
 
 beartype_this_package()
diff --git a/embeddings_similarity_rating/compute.py b/embeddings_similarity_rating/compute.py
@@ -11,57 +11,12 @@
 The module is particularly useful for working with Likert scale responses and their
 embeddings, providing tools to analyze and transform the underlying probability
 distributions.
-
-Examples
---------
->>> x = np.arange(1,6)
->>> pdf = np.array([0.1,0.15,0.05,0.2,0.5])
->>> real_mean = 3.0
->>> T, scaled_pdf = get_optimal_temperature_mean(x, pdf, real_mean)
 """
 
 import numpy as np
-from scipy.optimize import minimize
-
-
-def cos_to_pdf(cos):
-    """
-    Convert cosine similarities to a probability density function (PDF).
-
-    Parameters
-    ----------
-    cos : array_like
-        Array of cosine similarity values
-
-    Returns
-    -------
-    numpy.ndarray
-        Normalized PDF where all values sum to 1
-    """
-    hist = np.array(cos) - np.min(cos)
-    return hist / hist.sum()
 
 
-def cos_sim(emb1, emb2):
-    """
-    Calculate cosine similarity between two embeddings.
-
-    Parameters
-    ----------
-    emb1 : array_like
-        First embedding vector
-    emb2 : array_like
-        Second embedding vector
-
-    Returns
-    -------
-    float
-        Cosine similarity score between 0 and 1
-    """
-    return (1 + cos_sim_pdf(emb1, emb2)) / 2
-
-
-def scale_pdf(pdf, temperature, max_temp=10):
+def scale_pdf(pdf, temperature, max_temp=np.inf):
     """
     Scale a PDF using temperature scaling.
 
@@ -72,7 +27,7 @@ def scale_pdf(pdf, temperature, max_temp=10):
     temperature : float
         Temperature parameter for scaling (0 to max_temp)
     max_temp : float, optional
-        Maximum temperature value, by default 10
+        Maximum temperature value, by default np.inf
 
     Returns
     -------
@@ -99,187 +54,6 @@ def scale_pdf(pdf, temperature, max_temp=10):
     return hist / hist.sum()
 
 
-def scale_pdf_no_max_temp(pdf, temperature):
-    """Calls ``scale_pdf(pdf, temperature, max_temp=np.inf)``"""
-    return scale_pdf(pdf, temperature, max_temp=np.inf)
-
-
-def cos_sim_pdf(pdf1, pdf2):
-    """
-    Calculate cosine similarity between two PDFs.
-
-    Parameters
-    ----------
-    pdf1 : array_like
-        First probability density function
-    pdf2 : array_like
-        Second probability density function
-
-    Returns
-    -------
-    float
-        Cosine similarity between the PDFs
-    """
-    return pdf1.dot(pdf2) / np.linalg.norm(pdf1) / np.linalg.norm(pdf2)
-
-
-def KS_sim_pdf(pdf1, pdf2):
-    """
-    Calculate Kolmogorov-Smirnov similarity between two PDFs.
-
-    Parameters
-    ----------
-    pdf1 : array_like
-        First probability density function
-    pdf2 : array_like
-        Second probability density function
-
-    Returns
-    -------
-    float
-        KS similarity score between 0 and 1
-    """
-    return 1 - np.max(np.abs(np.cumsum(pdf1) - np.cumsum(pdf2)))
-
-
-def pdf_moment(pdf, x, m):
-    """
-    Calculate the m-th moment of a PDF.
-
-    Parameters
-    ----------
-    pdf : array_like
-        Probability density function
-    x : array_like
-        Values corresponding to the PDF
-    m : int
-        Order of the moment to calculate
-
-    Returns
-    -------
-    float
-        The m-th moment of the PDF
-    """
-    return pdf.dot(x**m)
-
-
-def mean(pdf, x):
-    """
-    Calculate the mean of a PDF.
-
-    Parameters
-    ----------
-    pdf : array_like
-        Probability density function
-    x : array_like
-        Values corresponding to the PDF
-
-    Returns
-    -------
-    float
-        Mean value of the PDF
-    """
-    return pdf_moment(pdf, x, m=1)
-
-
-def var(pdf, x):
-    """
-    Calculate the variance of a PDF.
-
-    Parameters
-    ----------
-    pdf : array_like
-        Probability density function
-    x : array_like
-        Values corresponding to the PDF
-
-    Returns
-    -------
-    float
-        Variance of the PDF
-    """
-    _x_ = mean(pdf, x)
-    _x2_ = pdf_moment(pdf, x, m=2)
-    return _x2_ - _x_**2
-
-
-def std(pdf, x):
-    """
-    Calculate the standard deviation of a PDF.
-
-    Parameters
-    ----------
-    pdf : array_like
-        Probability density function
-    x : array_like
-        Values corresponding to the PDF
-
-    Returns
-    -------
-    float
-        Standard deviation of the PDF
-    """
-    return np.sqrt(var(pdf, x))
-
-
-def get_optimal_temperature_mean(x, pdf, real_mean):
-    """
-    Find the optimal temperature that matches the mean of a scaled PDF to a target mean.
-
-    Parameters
-    ----------
-    x : array_like
-        Values corresponding to the PDF
-    pdf : array_like
-        Input probability density function
-    real_mean : float
-        Target mean value
-
-    Returns
-    -------
-    tuple
-        (optimal_temperature, scaled_pdf)
-    """
-
-    def _obj(T):
-        return (mean(scale_pdf(pdf, T), x) - real_mean) ** 2
-
-    T0 = 1.0
-    res = minimize(_obj, T0, bounds=[(0, 10.0)])
-
-    T = res.x[0]
-    pdf = scale_pdf(pdf, T)
-    return T, pdf
-
-
-def get_optimal_temperature_KS_sim(pdf, real_pdf):
-    """
-    Find the optimal temperature that maximizes KS similarity between scaled PDF and target PDF.
-
-    Parameters
-    ----------
-    pdf : array_like
-        Input probability density function
-    real_pdf : array_like
-        Target probability density function
-
-    Returns
-    -------
-    tuple
-        (optimal_temperature, scaled_pdf)
-    """
-
-    def _obj(T):
-        return -KS_sim_pdf(scale_pdf(pdf, T), real_pdf)
-
-    T0 = 1.0
-    res = minimize(_obj, T0, bounds=[(0, 10.0)])
-
-    T = res.x[0]
-    pdf = scale_pdf(pdf, T)
-    return T, pdf
-
-
 def response_embeddings_to_pdf(matrix_responses, matrix_likert_sentences):
     """
     Convert response embeddings and Likert sentence embeddings to a PDF.
@@ -314,13 +88,3 @@ def response_embeddings_to_pdf(matrix_responses, matrix_likert_sentences):
     pdf = cos / sum_per_row[:, None]
 
     return pdf
-
-
-if __name__ == "__main__":
-    # Example usage with test data
-    x = np.arange(1, 6)
-    pdf = np.array([0.1, 0.15, 0.05, 0.2, 0.5])
-    realpdf = np.array([0.1, 0.15, 0.5, 0.15, 0.1])
-    real_mean = 3.0
-    print(get_optimal_temperature_mean(x, pdf, real_mean))
-    print(get_optimal_temperature_KS_sim(pdf, real_mean))
diff --git a/embeddings_similarity_rating/embeddings_rater.py b/embeddings_similarity_rating/embeddings_rater.py
@@ -161,10 +161,7 @@ def get_response_pdfs(self, reference_set_id, llm_response_matrix, temperature=1
 
         if temperature != 1.0:
             llm_response_pdfs = np.array(
-                [
-                    compute.scale_pdf_no_max_temp(_pdf, temperature)
-                    for _pdf in llm_response_pdfs
-                ]
+                [compute.scale_pdf(_pdf, temperature) for _pdf in llm_response_pdfs]
             )
 
         return llm_response_pdfs
diff --git a/tests/test_compute.py b/tests/test_compute.py