Skip to content

Commit 59a68a4

Browse files
authored
Conversational Dynamics Similarity (ConDynS) and ConvoKit GenAI Tool (#288)
1 parent 5ec4530 commit 59a68a4

36 files changed

+10089
-5
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
[![license](https://img.shields.io/badge/license-MIT-green)](https://github.com/CornellNLP/ConvoKit/blob/master/LICENSE.md)
1010
[![Discord Community](https://img.shields.io/static/v1?logo=discord&style=flat&color=red&label=discord&message=community)](https://discord.gg/WMFqMWgz6P)
1111

12-
This toolkit contains tools to extract conversational features and analyze social phenomena in conversations, using a [single unified interface](https://convokit.cornell.edu/documentation/architecture.html) inspired by (and compatible with) scikit-learn. Several large [conversational datasets](https://github.com/CornellNLP/ConvoKit#datasets) are included together with scripts exemplifying the use of the toolkit on these datasets. The latest version is [3.5.0](https://github.com/CornellNLP/ConvoKit/releases/tag/v3.5.0) (released Oct. 15, 2025); follow the [project on GitHub](https://github.com/CornellNLP/ConvoKit) to keep track of updates.
12+
This toolkit contains tools to extract conversational features and analyze social phenomena in conversations, using a [single unified interface](https://convokit.cornell.edu/documentation/architecture.html) inspired by (and compatible with) scikit-learn. Several large [conversational datasets](https://github.com/CornellNLP/ConvoKit#datasets) are included together with scripts exemplifying the use of the toolkit on these datasets. The latest version is [3.6.0](https://github.com/CornellNLP/ConvoKit/releases/tag/v3.6.0) (released Oct. 25, 2025); follow the [project on GitHub](https://github.com/CornellNLP/ConvoKit) to keep track of updates.
1313

1414
Join our [Discord community](https://discord.gg/WMFqMWgz6P) to stay informed, connect with fellow developers, and be part of an engaging space where we share progress, discuss features, and tackle issues together.
1515

convokit/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
"utterance_likelihood": ".utterance_likelihood",
3333
"speaker_convo_helpers": ".speaker_convo_helpers",
3434
"politeness_collections": ".politeness_collections",
35+
"genai": ".genai",
36+
"convo_similarity": ".convo_similarity",
3537
"talktimesharing": ".talktimesharing",
3638
}
3739

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from .scd import SCD
2+
from .condyns import ConDynS
3+
from .naive_condyns import NaiveConDynS
4+
from .baseline import ConDynSBaselines
5+
from .utils import *
6+
7+
__all__ = [
8+
"SCD",
9+
"ConDynS",
10+
"NaiveConDynS",
11+
"ConDynSBaselines",
12+
]
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
import re
2+
import json
3+
from sentence_transformers import SentenceTransformer, util
4+
from evaluate import load
5+
6+
try:
7+
from convokit.genai import get_llm_client
8+
9+
GENAI_AVAILABLE = True
10+
except ImportError:
11+
GENAI_AVAILABLE = False
12+
13+
14+
class ConDynSBaselines:
15+
"""A class providing baseline methods for computing conversation similarity to compare with ConDynS.
16+
17+
This class provides various baseline methods for comparing conversations including
18+
BERTScore, cosine similarity using sentence embeddings, and GPT-based comparison methods.
19+
The baseline methods are used to compare with ConDynS.
20+
21+
:param model_provider: The GenAI provider to use (e.g., "gpt", "gemini")
22+
:param model: Optional specific model name
23+
:param sentence_transformer_model: Sentence transformer model to use for embeddings (default: "all-MiniLM-L6-v2")
24+
:param device: Device to use for sentence transformer (default: "cpu")
25+
"""
26+
27+
def __init__(
28+
self,
29+
model_provider: str,
30+
config,
31+
model: str = None,
32+
sentence_transformer_model: str = "all-MiniLM-L6-v2",
33+
device: str = "cpu",
34+
):
35+
"""Initialize the ConDynSBaselines with specified models and configurations.
36+
37+
:param model_provider: The GenAI provider to use (e.g., "gpt", "gemini")
38+
:param model: Optional specific model name
39+
:param sentence_transformer_model: Sentence transformer model to use for embeddings
40+
:param device: Device to use for sentence transformer
41+
:raises ImportError: If required dependencies are not available
42+
"""
43+
if not GENAI_AVAILABLE:
44+
raise ImportError(
45+
"GenAI dependencies not available. Please install via `pip install convokit[genai]`."
46+
)
47+
48+
self.model_provider = model_provider
49+
self.model = model
50+
self.sentence_transformer_model = sentence_transformer_model
51+
self.device = device
52+
self.client = get_llm_client(model_provider, config, model=model)
53+
self.st_model = SentenceTransformer(sentence_transformer_model, device=device)
54+
self.util = util
55+
self.bertscore = load("bertscore")
56+
57+
def get_bertscore(self, pred, ref):
58+
"""Compute BERTScore between prediction and reference texts.
59+
60+
Uses the BERTScore metric to evaluate semantic similarity between two texts.
61+
62+
:param pred: Prediction text to evaluate
63+
:param ref: Reference text to compare against
64+
:return: BERTScore computation results
65+
"""
66+
a = [pred]
67+
b = [ref]
68+
return self.bertscore.compute(
69+
predictions=a, references=b, model_type="distilbert-base-uncased"
70+
)
71+
72+
def get_cosine_similarity(self, pred, ref):
73+
"""Compute cosine similarity between two texts using sentence embeddings.
74+
75+
Uses the SentenceTransformer model to generate embeddings and computes
76+
cosine similarity between them.
77+
78+
:param pred: First text for comparison
79+
:param ref: Second text for comparison
80+
:return: Cosine similarity score between 0 and 1
81+
"""
82+
embeddings = self.st_model.encode([pred, ref], convert_to_tensor=True)
83+
similarity = self.util.cos_sim(embeddings[0], embeddings[1])
84+
return similarity.item()
85+
86+
def _parse_gpt_responses(self, response):
87+
"""Parse and clean model responses containing JSON.
88+
89+
Extracts JSON content from model responses that may contain markdown formatting
90+
and handles potential parsing errors.
91+
92+
:param response: Raw response text from model
93+
:return: Parsed JSON data as dictionary
94+
"""
95+
clean_json_str = re.sub(r"```json|```", "", response).strip()
96+
try:
97+
parsed_data = json.loads(clean_json_str)
98+
except json.JSONDecodeError:
99+
print(f"Error decoding JSON for response: {response}") # Debugging output if needed
100+
return parsed_data
101+
102+
def get_gpt_compare_score(self, pred, ref, prompt):
103+
"""Compare two texts using GPT model with a custom prompt.
104+
105+
Sends a formatted prompt to GPT model to compare two texts and returns
106+
similarity score and reasoning.
107+
108+
:param pred: First text for comparison
109+
:param ref: Second text for comparison
110+
:param prompt: Prompt template to use for comparison
111+
:return: Tuple of (similarity_score, reasoning)
112+
"""
113+
gpt_prompt = prompt.format(pred=pred, ref=ref)
114+
response = self.client.generate(gpt_prompt)
115+
parsed_response = self._parse_gpt_responses(response)
116+
score = parsed_response["sim_score"]
117+
reason = parsed_response["reason"]
118+
return score, reason
119+
120+
def get_naive_gpt_compare_score_SCDs(self, scd1, scd2):
121+
"""Compare two Summary of Conversation Dynamics (SCD) using GPT.
122+
123+
Compares two SCD summaries and rates their similarity based on persuasion
124+
trajectory and conversational dynamics, ignoring specific topics or claims.
125+
126+
:param scd1: First SCD summary
127+
:param scd2: Second SCD summary
128+
:return: Tuple of (similarity_score, reasoning)
129+
"""
130+
naive_gpt_compare_scd_prompt = """Compare the following two summary of conversation dynamics (SCD) of two online conversations, rate the similarity of the two conversations on a scale from 1 to 100, based on their persuasion trajectory reflected in the SCDs.
131+
132+
### **Key Aspects of Persuasion Trajectory**
133+
- **Persuasion Strategies**: Logical reasoning, emotional appeals, rhetorical questions, citing authority, anecdotes, hypothetical scenarios, refuting counterarguments, shifting burden of proof, repetition, framing, social proof.
134+
- **Interaction Dynamics**: Engagement patterns (e.g., single argument vs. back-and-forth), timing effects, persistence vs. resistance.
135+
- **Response to Persuasion**: Agreement, concession, skepticism, counter-argument, disengagement, linguistic indicators (e.g., hedging, intensity, pronouns).
136+
- **Trajectory of Persuasion**: Gradual shift, immediate agreement, persistent resistance, partial concession, reversal, stalemate.
137+
138+
### **Ignore**:
139+
- **Do not consider specific topics, claims, or arguments.
140+
141+
### **Output Requirements**
142+
Return a JSON object containing:
143+
- `"sim_score"` (int): A similarity score between 0-100, representing how similar the conversations themselves are in **trajectory** based on the SCDs.
144+
- `"reason"` (string, ≤30 words): A brief explanation of why the score was given, referencing key conversational dynamics.
145+
146+
### **Output Format (JSON)**
147+
```json
148+
{{
149+
"sim_score": <int>,
150+
"reason": "<brief explanation (≤30 words)>"
151+
}}
152+
153+
### **Conversations**
154+
Conversation 1 SCD:
155+
{pred}
156+
157+
Conversation 2 SCD:
158+
{ref}
159+
"""
160+
score, reason = self.get_gpt_compare_score(scd1, scd2, naive_gpt_compare_scd_prompt)
161+
return score, reason
162+
163+
def get_naive_gpt_compare_score_Transcripts(self, transcript1, transcript2):
164+
"""Compare two conversation transcripts using GPT.
165+
166+
Compares two conversation transcripts and rates their similarity based on
167+
conversational trajectory and dynamics, ignoring specific topics discussed.
168+
169+
:param transcript1: First conversation transcript
170+
:param transcript2: Second conversation transcript
171+
:return: Tuple of (similarity_score, reasoning)
172+
"""
173+
naive_gpt_compare_transcript_prompt = """Compare the following two online conversations and rate their similarity on a scale from 1 to 100, based on their trajectory.
174+
175+
### **Definition of Trajectory**
176+
The trajectory of a conversation refers to its **dynamics**, including:
177+
- **Changes in tone** (e.g., neutral to argumentative, formal to casual, sarcastic or sincere).
178+
- **Patterns of interaction** (e.g., back-and-forth exchanges, long monologues, interruptions).
179+
- **Conversation strategies** (e.g., persuasion, questioning, storytelling).
180+
- **Order of the above trajectory events**
181+
182+
### **Ignore**:
183+
- The topics discussed.
184+
- Specific factual content.
185+
186+
### **Output Requirements**
187+
Return a JSON object containing:
188+
- `"sim_score"` (int): A similarity score between 0-100, representing how similar the conversations are in **trajectory**.
189+
- `"reason"` (string, ≤30 words): A brief explanation of why the score was given, referencing key conversational dynamics.
190+
191+
### **Output Format (JSON)**
192+
```json
193+
{{
194+
"sim_score": <int>,
195+
"reason": "<brief explanation (≤30 words)>"
196+
}}
197+
198+
### **Conversations**
199+
Conversation 1:
200+
{pred}
201+
202+
Conversation 2:
203+
{ref}
204+
"""
205+
score, reason = self.get_gpt_compare_score(
206+
transcript1, transcript2, naive_gpt_compare_transcript_prompt
207+
)
208+
return score, reason

0 commit comments

Comments
 (0)