@@ -10,35 +10,44 @@ const assembly = new AssemblyAI({
10
10
export const transcription = functions . https . onRequest ( async ( req , res ) => {
11
11
if ( req . headers [ "x-maple-webhook" ] ) {
12
12
if ( req . body . status === "completed" ) {
13
+ // If we get a request with the right header and status, get the
14
+ // transcription from the assembly API.
13
15
const transcript = await assembly . transcripts . get ( req . body . transcript_id )
14
16
if ( transcript && transcript . webhook_auth ) {
15
- const maybeEventInDb = await db
17
+ // If there is a transcript and the transcript has an auth property,
18
+ // look for an event (aka Hearing) in the DB with a matching ID.
19
+ const maybeEventsInDb = await db
16
20
. collection ( "events" )
17
21
. where ( "videoAssemblyId" , "==" , transcript . id )
18
22
. get ( )
19
23
20
- if ( maybeEventInDb . docs . length ) {
21
- const authenticatedEventsInDb = maybeEventInDb . docs . filter (
22
- async e => {
23
- const hashedToken = sha256 ( String ( req . headers [ "x-maple-webhook" ] ) )
24
+ if ( maybeEventsInDb . docs . length ) {
25
+ // If we have a match look for one that matches a hash of the token
26
+ // we gave Assembly. There should only be one of these but firestore
27
+ // gives us an array. If there is more than one member, something is
28
+ // wrong
29
+ const authenticatedEventIds = [ ] as string [ ]
30
+ const hashedToken = sha256 ( String ( req . headers [ "x-maple-webhook" ] ) )
24
31
25
- const tokenInDb = await db
26
- . collection ( "events" )
27
- . doc ( e . id )
28
- . collection ( "private" )
29
- . doc ( "webhookAuth ")
30
- . get ( )
31
- const tokenInDbData = tokenInDb . data ( )
32
+ maybeEventsInDb . docs . forEach ( async doc => {
33
+ const tokenDocInDb = await db
34
+ . collection ( "events" )
35
+ . doc ( doc . id )
36
+ . collection ( "private ")
37
+ . doc ( "webhookAuth" )
38
+ . get ( )
32
39
33
- if ( tokenInDbData ) {
34
- return hashedToken === tokenInDbData . videoAssemblyWebhookToken
35
- }
36
- return false
40
+ const tokenDataInDb = tokenDocInDb . data ( ) ?. videoAssemblyWebhookToken
41
+
42
+ if ( hashedToken === tokenDataInDb ) {
43
+ authenticatedEventIds . push ( doc . id )
37
44
}
38
- )
45
+ } )
39
46
40
- const { id, text, audio_url, utterances, words } = transcript
41
- if ( authenticatedEventsInDb ) {
47
+ if ( authenticatedEventIds . length === 1 ) {
48
+ // If there is one authenticated event, pull out the parts we want to
49
+ // save and try to save them in the db.
50
+ const { id, text, audio_url, utterances } = transcript
42
51
try {
43
52
const transcriptionInDb = await db
44
53
. collection ( "transcriptions" )
@@ -51,25 +60,33 @@ export const transcription = functions.https.onRequest(async (req, res) => {
51
60
audio_url
52
61
} )
53
62
63
+ // Put each `utterance` in a separate doc in an utterances
64
+ // collection. Previously had done the same for `words` but
65
+ // got worried about collection size and write times since
66
+ // `words` can be tens of thousands of members.
54
67
if ( utterances ) {
55
68
const writer = db . bulkWriter ( )
56
69
for ( let utterance of utterances ) {
57
70
const { speaker, confidence, start, end, text } = utterance
58
71
59
72
writer . set (
60
- db . doc ( `/transcriptions/${ transcript . id } /utterances/` ) ,
73
+ db
74
+ . collection ( "transcriptions" )
75
+ . doc ( `${ transcript . id } ` )
76
+ . collection ( "utterances" )
77
+ . doc ( ) ,
61
78
{ speaker, confidence, start, end, text }
62
79
)
63
80
}
64
81
65
82
await writer . close ( )
66
83
}
67
84
68
- const batch = db . batch ( )
69
- authenticatedEventsInDb . forEach ( doc => {
70
- batch . update ( doc . ref , { [ "x-maple-webhook" ] : null } )
85
+ // Delete the hashed webhook auth token from our db now that
86
+ // we're done.
87
+ authenticatedEventIds . forEach ( async docId => {
88
+ await db . doc ( docId ) . set ( { [ "x-maple-webhook" ] : null } )
71
89
} )
72
- await batch . commit ( )
73
90
} catch ( error ) {
74
91
console . log ( error )
75
92
}
0 commit comments