fixed some comments

kendallwerts · kendallwerts · commit 2a1bf11482bf · 2025-04-08T10:20:52.000-05:00
diff --git a/notebooks/experiments/minutes_diarization/download_minutes.py b/notebooks/experiments/minutes_diarization/download_minutes.py
@@ -21,11 +21,11 @@ def download_minutes_pdfs():
         # Parse the HTML using selectolax
         tree = HTMLParser(response.content)
 
-        # Find all rows that contain filename divs
+        # Find all divs with class row
         rows = tree.css("div.row")
 
         for row in rows:
-            # Find the filename div in this row
+            # Find divs with class fileName
             filename_div = row.css_first("div.fileName")
             if not filename_div:
                 continue
diff --git a/notebooks/experiments/minutes_diarization/process_min_diarization.py b/notebooks/experiments/minutes_diarization/process_min_diarization.py
@@ -39,7 +39,7 @@ def format_timestamp(seconds: float) -> str:
         secs = int(seconds % 60)
         return f"{hours:02d}:{minutes:02d}:{secs:02d}"
 
-    # Create formatted HTML output
+    # Copied from John's transcription creator to simplify the diarization. Can probably turn this into a function or use the VTT instead.
     speaker_lines = ["Meeting Script - Combined by Speaker"]
 
     current_speaker = None
@@ -99,7 +99,7 @@ def match_speakers_with_gemini(minutes_text, diarization):
             system_instruction=instruction,
         ),
     )
-    resp = chat.send_message("Show me all the market entities")
+    resp = chat.send_message("Show me speaker identification")
 
     result = resp.candidates[0].content.parts[0].text
     print(f"result: {result}")