|
| 1 | +import re |
| 2 | +import json |
| 3 | +from sentence_transformers import SentenceTransformer, util |
| 4 | +from evaluate import load |
| 5 | + |
| 6 | +try: |
| 7 | + from convokit.genai import get_llm_client |
| 8 | + |
| 9 | + GENAI_AVAILABLE = True |
| 10 | +except ImportError: |
| 11 | + GENAI_AVAILABLE = False |
| 12 | + |
| 13 | + |
| 14 | +class ConDynSBaselines: |
| 15 | + """A class providing baseline methods for computing conversation similarity to compare with ConDynS. |
| 16 | +
|
| 17 | + This class provides various baseline methods for comparing conversations including |
| 18 | + BERTScore, cosine similarity using sentence embeddings, and GPT-based comparison methods. |
| 19 | + The baseline methods are used to compare with ConDynS. |
| 20 | +
|
| 21 | + :param model_provider: The GenAI provider to use (e.g., "gpt", "gemini") |
| 22 | + :param model: Optional specific model name |
| 23 | + :param sentence_transformer_model: Sentence transformer model to use for embeddings (default: "all-MiniLM-L6-v2") |
| 24 | + :param device: Device to use for sentence transformer (default: "cpu") |
| 25 | + """ |
| 26 | + |
| 27 | + def __init__( |
| 28 | + self, |
| 29 | + model_provider: str, |
| 30 | + config, |
| 31 | + model: str = None, |
| 32 | + sentence_transformer_model: str = "all-MiniLM-L6-v2", |
| 33 | + device: str = "cpu", |
| 34 | + ): |
| 35 | + """Initialize the ConDynSBaselines with specified models and configurations. |
| 36 | +
|
| 37 | + :param model_provider: The GenAI provider to use (e.g., "gpt", "gemini") |
| 38 | + :param model: Optional specific model name |
| 39 | + :param sentence_transformer_model: Sentence transformer model to use for embeddings |
| 40 | + :param device: Device to use for sentence transformer |
| 41 | + :raises ImportError: If required dependencies are not available |
| 42 | + """ |
| 43 | + if not GENAI_AVAILABLE: |
| 44 | + raise ImportError( |
| 45 | + "GenAI dependencies not available. Please install via `pip install convokit[genai]`." |
| 46 | + ) |
| 47 | + |
| 48 | + self.model_provider = model_provider |
| 49 | + self.model = model |
| 50 | + self.sentence_transformer_model = sentence_transformer_model |
| 51 | + self.device = device |
| 52 | + self.client = get_llm_client(model_provider, config, model=model) |
| 53 | + self.st_model = SentenceTransformer(sentence_transformer_model, device=device) |
| 54 | + self.util = util |
| 55 | + self.bertscore = load("bertscore") |
| 56 | + |
| 57 | + def get_bertscore(self, pred, ref): |
| 58 | + """Compute BERTScore between prediction and reference texts. |
| 59 | +
|
| 60 | + Uses the BERTScore metric to evaluate semantic similarity between two texts. |
| 61 | +
|
| 62 | + :param pred: Prediction text to evaluate |
| 63 | + :param ref: Reference text to compare against |
| 64 | + :return: BERTScore computation results |
| 65 | + """ |
| 66 | + a = [pred] |
| 67 | + b = [ref] |
| 68 | + return self.bertscore.compute( |
| 69 | + predictions=a, references=b, model_type="distilbert-base-uncased" |
| 70 | + ) |
| 71 | + |
| 72 | + def get_cosine_similarity(self, pred, ref): |
| 73 | + """Compute cosine similarity between two texts using sentence embeddings. |
| 74 | +
|
| 75 | + Uses the SentenceTransformer model to generate embeddings and computes |
| 76 | + cosine similarity between them. |
| 77 | +
|
| 78 | + :param pred: First text for comparison |
| 79 | + :param ref: Second text for comparison |
| 80 | + :return: Cosine similarity score between 0 and 1 |
| 81 | + """ |
| 82 | + embeddings = self.st_model.encode([pred, ref], convert_to_tensor=True) |
| 83 | + similarity = self.util.cos_sim(embeddings[0], embeddings[1]) |
| 84 | + return similarity.item() |
| 85 | + |
| 86 | + def _parse_gpt_responses(self, response): |
| 87 | + """Parse and clean model responses containing JSON. |
| 88 | +
|
| 89 | + Extracts JSON content from model responses that may contain markdown formatting |
| 90 | + and handles potential parsing errors. |
| 91 | +
|
| 92 | + :param response: Raw response text from model |
| 93 | + :return: Parsed JSON data as dictionary |
| 94 | + """ |
| 95 | + clean_json_str = re.sub(r"```json|```", "", response).strip() |
| 96 | + try: |
| 97 | + parsed_data = json.loads(clean_json_str) |
| 98 | + except json.JSONDecodeError: |
| 99 | + print(f"Error decoding JSON for response: {response}") # Debugging output if needed |
| 100 | + return parsed_data |
| 101 | + |
| 102 | + def get_gpt_compare_score(self, pred, ref, prompt): |
| 103 | + """Compare two texts using GPT model with a custom prompt. |
| 104 | +
|
| 105 | + Sends a formatted prompt to GPT model to compare two texts and returns |
| 106 | + similarity score and reasoning. |
| 107 | +
|
| 108 | + :param pred: First text for comparison |
| 109 | + :param ref: Second text for comparison |
| 110 | + :param prompt: Prompt template to use for comparison |
| 111 | + :return: Tuple of (similarity_score, reasoning) |
| 112 | + """ |
| 113 | + gpt_prompt = prompt.format(pred=pred, ref=ref) |
| 114 | + response = self.client.generate(gpt_prompt) |
| 115 | + parsed_response = self._parse_gpt_responses(response) |
| 116 | + score = parsed_response["sim_score"] |
| 117 | + reason = parsed_response["reason"] |
| 118 | + return score, reason |
| 119 | + |
| 120 | + def get_naive_gpt_compare_score_SCDs(self, scd1, scd2): |
| 121 | + """Compare two Summary of Conversation Dynamics (SCD) using GPT. |
| 122 | +
|
| 123 | + Compares two SCD summaries and rates their similarity based on persuasion |
| 124 | + trajectory and conversational dynamics, ignoring specific topics or claims. |
| 125 | +
|
| 126 | + :param scd1: First SCD summary |
| 127 | + :param scd2: Second SCD summary |
| 128 | + :return: Tuple of (similarity_score, reasoning) |
| 129 | + """ |
| 130 | + naive_gpt_compare_scd_prompt = """Compare the following two summary of conversation dynamics (SCD) of two online conversations, rate the similarity of the two conversations on a scale from 1 to 100, based on their persuasion trajectory reflected in the SCDs. |
| 131 | + |
| 132 | +### **Key Aspects of Persuasion Trajectory** |
| 133 | +- **Persuasion Strategies**: Logical reasoning, emotional appeals, rhetorical questions, citing authority, anecdotes, hypothetical scenarios, refuting counterarguments, shifting burden of proof, repetition, framing, social proof. |
| 134 | +- **Interaction Dynamics**: Engagement patterns (e.g., single argument vs. back-and-forth), timing effects, persistence vs. resistance. |
| 135 | +- **Response to Persuasion**: Agreement, concession, skepticism, counter-argument, disengagement, linguistic indicators (e.g., hedging, intensity, pronouns). |
| 136 | +- **Trajectory of Persuasion**: Gradual shift, immediate agreement, persistent resistance, partial concession, reversal, stalemate. |
| 137 | +
|
| 138 | +### **Ignore**: |
| 139 | +- **Do not consider specific topics, claims, or arguments. |
| 140 | +
|
| 141 | +### **Output Requirements** |
| 142 | +Return a JSON object containing: |
| 143 | +- `"sim_score"` (int): A similarity score between 0-100, representing how similar the conversations themselves are in **trajectory** based on the SCDs. |
| 144 | +- `"reason"` (string, ≤30 words): A brief explanation of why the score was given, referencing key conversational dynamics. |
| 145 | +
|
| 146 | +### **Output Format (JSON)** |
| 147 | +```json |
| 148 | +{{ |
| 149 | + "sim_score": <int>, |
| 150 | + "reason": "<brief explanation (≤30 words)>" |
| 151 | +}} |
| 152 | +
|
| 153 | +### **Conversations** |
| 154 | +Conversation 1 SCD: |
| 155 | +{pred} |
| 156 | +
|
| 157 | +Conversation 2 SCD: |
| 158 | +{ref} |
| 159 | +""" |
| 160 | + score, reason = self.get_gpt_compare_score(scd1, scd2, naive_gpt_compare_scd_prompt) |
| 161 | + return score, reason |
| 162 | + |
| 163 | + def get_naive_gpt_compare_score_Transcripts(self, transcript1, transcript2): |
| 164 | + """Compare two conversation transcripts using GPT. |
| 165 | +
|
| 166 | + Compares two conversation transcripts and rates their similarity based on |
| 167 | + conversational trajectory and dynamics, ignoring specific topics discussed. |
| 168 | +
|
| 169 | + :param transcript1: First conversation transcript |
| 170 | + :param transcript2: Second conversation transcript |
| 171 | + :return: Tuple of (similarity_score, reasoning) |
| 172 | + """ |
| 173 | + naive_gpt_compare_transcript_prompt = """Compare the following two online conversations and rate their similarity on a scale from 1 to 100, based on their trajectory. |
| 174 | + |
| 175 | +### **Definition of Trajectory** |
| 176 | +The trajectory of a conversation refers to its **dynamics**, including: |
| 177 | +- **Changes in tone** (e.g., neutral to argumentative, formal to casual, sarcastic or sincere). |
| 178 | +- **Patterns of interaction** (e.g., back-and-forth exchanges, long monologues, interruptions). |
| 179 | +- **Conversation strategies** (e.g., persuasion, questioning, storytelling). |
| 180 | +- **Order of the above trajectory events** |
| 181 | +
|
| 182 | +### **Ignore**: |
| 183 | +- The topics discussed. |
| 184 | +- Specific factual content. |
| 185 | +
|
| 186 | +### **Output Requirements** |
| 187 | +Return a JSON object containing: |
| 188 | +- `"sim_score"` (int): A similarity score between 0-100, representing how similar the conversations are in **trajectory**. |
| 189 | +- `"reason"` (string, ≤30 words): A brief explanation of why the score was given, referencing key conversational dynamics. |
| 190 | +
|
| 191 | +### **Output Format (JSON)** |
| 192 | +```json |
| 193 | +{{ |
| 194 | + "sim_score": <int>, |
| 195 | + "reason": "<brief explanation (≤30 words)>" |
| 196 | +}} |
| 197 | +
|
| 198 | +### **Conversations** |
| 199 | +Conversation 1: |
| 200 | +{pred} |
| 201 | +
|
| 202 | +Conversation 2: |
| 203 | +{ref} |
| 204 | +""" |
| 205 | + score, reason = self.get_gpt_compare_score( |
| 206 | + transcript1, transcript2, naive_gpt_compare_transcript_prompt |
| 207 | + ) |
| 208 | + return score, reason |
0 commit comments