Skip to content

Commit 4ce835b

Browse files
committed
fix(transcriptions): Only process mp4 videos. Before the video archive was ready, we were processing the in-progress livestream, which our transcription flow doesn't handle properly. Now, we should only process hearings after the video is finalized and we can process a complete transcription
1 parent 9807b3f commit 4ce835b

File tree

3 files changed

+52
-3
lines changed

3 files changed

+52
-3
lines changed

functions/src/events/helpers.test.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { addDays, subDays } from "date-fns"
2-
import { withinCutoff } from "./helpers"
2+
import { isValidVideoUrl, withinCutoff } from "./helpers"
33

44
describe("withinCutoff true", () => {
55
beforeEach(() => {
@@ -37,3 +37,26 @@ describe("withinCutoff true", () => {
3737
expect(result).toEqual(false)
3838
})
3939
})
40+
41+
describe("isValidVideoUrl", () => {
42+
it("should return true for a valid video URL", () => {
43+
const validUrl = "https://example.com/video.mp4"
44+
const result = isValidVideoUrl(validUrl)
45+
expect(result).toEqual(true)
46+
})
47+
48+
it("should return false for a missing URL", () => {
49+
const result = isValidVideoUrl(null)
50+
expect(result).toEqual(false)
51+
})
52+
53+
it("should return false for a URL with no file format", () => {
54+
const result = isValidVideoUrl("https://example.com/video")
55+
expect(result).toEqual(false)
56+
})
57+
58+
it("should return false for a URL with an unsupported format", () => {
59+
const result = isValidVideoUrl("https://example.com/video.m3u8")
60+
expect(result).toEqual(false)
61+
})
62+
})

functions/src/events/helpers.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,32 @@
11
import { isAfter, subDays } from "date-fns"
22

3+
const VIDEO_FORMAT_ALLOWLIST = ["mp4"]
4+
35
export const withinCutoff = (date: Date) => {
46
const now = new Date()
57
const cutoff = subDays(now, 8)
68

79
return isAfter(date, cutoff) && !isAfter(date, now)
810
}
11+
12+
// This isn't perfect because it relies on the file extension,
13+
// but it's a reasonable heuristic for now and should differentiate
14+
// the livestreams we don't want from the archived video we do want.
15+
export const isValidVideoUrl = (url: string | null | undefined) => {
16+
if (!url) return false
17+
18+
const fileFormat = url.split(".").pop()?.toLowerCase()
19+
if (!fileFormat) {
20+
console.log(`Could not find file format for video URL: {url}`)
21+
return false
22+
} else if (!VIDEO_FORMAT_ALLOWLIST.includes(fileFormat)) {
23+
console.log(
24+
`Url ${url} has unsupported video format: ${fileFormat}. Supported formats: ${VIDEO_FORMAT_ALLOWLIST.join(
25+
", "
26+
)}`
27+
)
28+
return false
29+
}
30+
31+
return true
32+
}

functions/src/events/scrapeEvents.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import {
1919
import { currentGeneralCourt } from "../shared"
2020
import { randomBytes } from "node:crypto"
2121
import { sha256 } from "js-sha256"
22-
import { withinCutoff } from "./helpers"
22+
import { isValidVideoUrl, withinCutoff } from "./helpers"
2323
import ffmpeg from "fluent-ffmpeg"
2424
import fs from "fs"
2525
abstract class EventScraper<ListItem, Event extends BaseEvent> {
@@ -238,7 +238,9 @@ const getHearingVideoUrl = async (EventId: number) => {
238238
dom.window.document.querySelectorAll("video source")
239239
if (maybeVideoSource.length && maybeVideoSource[0]) {
240240
const firstVideoSource = maybeVideoSource[0] as HTMLSourceElement
241-
return firstVideoSource.src
241+
const maybeVideoUrl = firstVideoSource.src
242+
243+
return isValidVideoUrl(maybeVideoUrl) ? maybeVideoUrl : null
242244
}
243245
}
244246
}

0 commit comments

Comments
 (0)