githubnext · Copilot · Jun 23, 2025 · Jun 23, 2025 · Jun 23, 2025 · Jun 23, 2025
diff --git a/.github/workflows/genai-video-slide-deck-annotator.yml b/.github/workflows/genai-video-slide-deck-annotator.yml
@@ -0,0 +1,36 @@
+name: genai video slide deck annotator
+on:
+  issues:
+    types: [opened, edited]
+permissions:
+    contents: read
+    issues: write
+    models: read
+concurrency:
+    group: ${{ github.workflow }}-${{ github.ref }}
+    cancel-in-progress: true
+jobs:
+  genai-video-slide-deck-analyze:
+    runs-on: ubuntu-latest
+    services:
+      whisper:
+        image: onerahmet/openai-whisper-asr-webservice:latest
+        env:
+          ASR_MODEL: base
+          ASR_ENGINE: openai_whisper
+        ports:
+          - 9000:9000
+        options: >-
+          --health-cmd "curl -f http://localhost:9000/docs || exit 1"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+          --health-start-period 20s
+    steps:
+      - uses: actions/checkout@v4
+      - uses: pelikhan/action-genai-video-issue-analyzer@main
+        with:
+          script: action-video-slide-deck-annotator
+          github_issue: ${{ github.event.issue.number }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          instructions: "Analyze the video frames to detect slide transitions in a presentation. Focus on identifying significant visual changes that indicate when slides change, ignore minor changes like cursor movement or highlighting. Generate timestamps with confidence scores for each detected transition."
diff --git a/Dockerfile b/Dockerfile
@@ -15,8 +15,11 @@ WORKDIR /genaiscript/action
 # Copy source code
 COPY . .
 
+# Make entrypoint script executable
+RUN chmod +x entrypoint.sh
+
 # Install dependencies
 RUN npm ci
 
 # GitHub Action forces the WORKDIR to GITHUB_WORKSPACE 
-ENTRYPOINT ["npm", "--prefix", "/genaiscript/action", "start"]
+ENTRYPOINT ["./entrypoint.sh"]
diff --git a/README.md b/README.md
@@ -3,8 +3,16 @@
 This GitHub Action runs all video assets in an issue body through a LLM model to analyze the content.
 The default behavior is to summarize and extract task items but this can be customized through the `prompt` input.
 
+**New**: The action now supports slide deck analysis with the `action-video-slide-deck-annotator` script that detects slide transitions and generates timestamps for presentations.
+
+## Scripts Available
+
+- **action-video-issue-analyzer** (default): Analyzes videos for general content summary and task extraction
+- **action-video-slide-deck-annotator**: Detects slide transitions in presentation videos and generates structured timestamps
+
 ## Inputs
 
+- `script`: The script to run (action-video-issue-analyzer or action-video-slide-deck-annotator). **(optional, defaults to action-video-issue-analyzer)**
 - `github_token`: GitHub token with `models: read` permission at least. **(required)**
 - `instructions`: Custom prompt to use for the LLM model. If not provided, a default prompt will be used.
 - `github_issue`: The issue number to analyze. Typically this variable is inferred from the event context.
@@ -38,6 +46,47 @@ It will launch a whisper service in a container that can be used by genaiscript.
           github_token: ${{ secrets.GITHUB_TOKEN }}
 ```
 
+## Slide Deck Annotator Usage
+
+To use the slide deck annotator for detecting slide transitions in presentation videos:
+
+```yaml
+    steps:
+      - uses: actions/checkout@v4
+      - uses: pelikhan/action-genai-video-issue-analyzer@v0
+        with:
+          script: action-video-slide-deck-annotator
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+```
+
+The slide deck annotator will output structured JSON with:
+- Video duration
+- Slide transition timestamps with confidence scores
+- Recommended 2-minute viewing segments for each slide
+
+Example output:
+```json
+{
+  "video_duration": "01:23:45",
+  "slide_transitions": [
+    {
+      "timestamp": "00:02:15",
+      "confidence": 0.95,
+      "slide_number": 1,
+      "description": "Title slide to agenda"
+    }
+  ],
+  "recommended_segments": [
+    {
+      "start": "00:00:00",
+      "end": "00:02:00", 
+      "slide": 1,
+      "description": "First 2 minutes of title slide"
+    }
+  ]
+}
+```
+
 ## Example
 
 Save the following in `.github/workflows/genai-video-issue-analyzer.yml` file:

diff --git a/action.yml b/action.yml
@@ -1,6 +1,10 @@
 name: action-genai-video-issue-analyzer
 description: Analyzes videos upload as assets
 inputs:
+  script:
+    description: The script to run (action-video-issue-analyzer or action-video-slide-deck-annotator).
+    required: false
+    default: action-video-issue-analyzer
   instructions:
     description: Custom prompting instructions for each video.
     required: false

diff --git a/entrypoint.sh b/entrypoint.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+# Set the script name from the input parameter, defaulting to action-video-issue-analyzer
+export SCRIPT_NAME="${INPUT_SCRIPT:-action-video-issue-analyzer}"
+
+# Set the whisper API base
+export WHISPERASR_API_BASE=http://whisper:9000
+
+# Run genaiscript directly with the selected script
+cd /genaiscript/action
+npx genaiscript run "$SCRIPT_NAME" --github-workspace --pull-request-comment --no-run-trace --no-output-trace
diff --git a/genaisrc/action-video-slide-deck-annotator.genai.mts b/genaisrc/action-video-slide-deck-annotator.genai.mts
@@ -0,0 +1,190 @@
+script({
+  title: "Analyzes videos to detect slide transitions and generate timestamps",
+  accept: "none",
+  parameters: {
+    instructions: {
+      type: "string",
+      description:
+        "Custom prompting instructions for slide transition detection.",
+      default:
+        "Analyze the video frames to detect slide transitions in a presentation. Focus on identifying significant visual changes that indicate when slides change, ignore minor changes like cursor movement or highlighting. Generate timestamps with confidence scores for each detected transition.",
+    },
+  },
+});
+
+const { dbg, output, vars } = env;
+const issue = await github.getIssue();
+if (!issue)
+  throw new Error(
+    "No issue found in the context. This action requires an issue to be present.",
+  );
+const { instructions } = vars as { instructions: string };
+if (!instructions)
+  throw new Error(
+    "No instructions provided. Please provide instructions to process the video.",
+  );
+
+const RX = /^https:\/\/github.com\/user-attachments\/assets\/.+$/gim;
+const assetLinks = Array.from(
+  new Set(Array.from(issue.body.matchAll(RX), (m) => m[0])),
+);
+if (assetLinks.length === 0)
+  cancel("No video assets found in the issue body, nothing to do.");
+
+dbg(`issue: %s`, issue.title);
+
+for (const assetLink of assetLinks) await processAssetLink(assetLink);
+
+async function processAssetLink(assetLink: string) {
+  output.heading(3, assetLink);
+  dbg(assetLink);
+  const downloadUrl = await github.resolveAssetUrl(assetLink);
+  const res = await fetch(downloadUrl, { method: "GET" });
+  const contentType = res.headers.get("content-type") || "";
+  dbg(`download url: %s`, downloadUrl);
+  dbg(`headers: %O`, res.headers);
+  if (!res.ok)
+    throw new Error(
+      `Failed to download asset from ${downloadUrl}: ${res.status} ${res.statusText}`,
+    );
+  if (!/^video\//.test(contentType)) {
+    output.p(`Asset is not a video file, skipping`);
+    return;
+  }
+
+  // save and cache
+  const buffer = await res.arrayBuffer();
+  dbg(`size`, `${(buffer.byteLength / 1e6) | 0}Mb`);
+  const filename = await workspace.writeCached(buffer, { scope: "run" });
+  dbg(`filename`, filename);
+
+  await processVideo(filename);
+}
+
+async function processVideo(filename: string) {
+  const transcript = await transcribe(filename, {
+    model: "whisperasr:default",
+    cache: true,
+  });
+  if (!transcript) {
+    output.error(`no transcript found for video ${filename}.`);
+  }
+
+  // Extract frames for slide transition detection
+  const frames = await ffmpeg.extractFrames(filename, {
+    transcript,
+  });
+
+  const { text, error } = await runPrompt(
+    (ctx) => {
+      ctx.def("TRANSCRIPT", transcript?.srt, { ignoreEmpty: true }); // ignore silent videos
+      ctx.defImages(frames, { detail: "high", sliceSample: 80 }); // higher detail for slide detection
+      ctx.$`${instructions}
+
+## Analysis Instructions
+
+You are analyzing a video of a slide deck presentation. Your task is to:
+
+1. **Detect Slide Transitions**: Identify when the content significantly changes between frames, indicating a new slide
+2. **Filter Noise**: Ignore minor changes like cursor movement, highlighting, or small animations
+3. **Generate Timestamps**: Provide accurate timestamps for each transition
+4. **Assess Confidence**: Rate your confidence in each detection (0.0 to 1.0)
+5. **Create Viewing Segments**: Generate recommended 2-minute viewing segments for each slide
+
+## Output Format
+
+Respond with a valid JSON object in the following format:
+
+\`\`\`json
+{
+  "video_duration": "HH:MM:SS",
+  "slide_transitions": [
+    {
+      "timestamp": "HH:MM:SS",
+      "confidence": 0.95,
+      "slide_number": 1,
+      "description": "Brief description of the transition"
+    }
+  ],
+  "recommended_segments": [
+    {
+      "start": "HH:MM:SS", 
+      "end": "HH:MM:SS",
+      "slide": 1,
+      "description": "First 2 minutes of slide content"
+    }
+  ]
+}
+\`\`\`
+
+## Key Guidelines
+
+- Focus on major visual changes that clearly indicate slide transitions
+- Confidence scores should reflect how certain you are about the transition
+- Slide numbers should increment sequentially starting from 1
+- Recommended segments should be exactly 2 minutes or until the next slide transition
+- Use the transcript to help understand content changes when visual changes are ambiguous
+- If frames show the same slide content, do not mark as a transition
+- Look for changes in slide titles, bullet points, images, charts, or overall layout
+
+Analyze the provided frames and transcript to detect slide transitions.`.role(
+        "system",
+      );
+    },
+    {
+      systemSafety: true,
+      model: "vision",
+      responseType: "json",
+      label: `analyze slide transitions ${filename}`,
+    },
+  );
+
+  if (error) {
+    output.error(error?.message);
+  } else {
+    // Parse and validate JSON response
+    try {
+      const analysisResult = JSON.parse(text);
+
+      // Display results in a formatted way
+      output.heading(4, "Slide Transition Analysis Results");
+      output.code(JSON.stringify(analysisResult, null, 2), "json");
+
+      // Also provide a summary
+      if (
+        analysisResult.slide_transitions &&
+        analysisResult.slide_transitions.length > 0
+      ) {
+        output.heading(5, "Summary");
+        output.p(
+          `Found ${analysisResult.slide_transitions.length} slide transitions in video duration: ${analysisResult.video_duration}`,
+        );
+
+        output.heading(5, "Detected Transitions");
+        for (const transition of analysisResult.slide_transitions) {
+          output.p(
+            `**Slide ${transition.slide_number}** at [${transition.timestamp}] (confidence: ${transition.confidence}) - ${transition.description}`,
+          );
+        }
+
+        if (
+          analysisResult.recommended_segments &&
+          analysisResult.recommended_segments.length > 0
+        ) {
+          output.heading(5, "Recommended Viewing Segments");
+          for (const segment of analysisResult.recommended_segments) {
+            output.p(
+              `**Slide ${segment.slide}**: [${segment.start}] - [${segment.end}] - ${segment.description}`,
+            );
+          }
+        }
+      } else {
+        output.p("No slide transitions detected in this video.");
+      }
+    } catch (parseError) {
+      output.error(`Failed to parse JSON response: ${parseError.message}`);
+      output.heading(4, "Raw Response");
+      output.appendContent(text);
+    }
+  }
+}