Skip to content

Commit 7a4ebfa

Browse files
committed
Add Forced Alignment Client and Models (#120)
* Introduced `ForcedAlignmentClient` for interacting with the Rev AI forced alignment API. * Added data models: `ForcedAlignmentJob`, `ForcedAlignmentResult`, `ElementAlignment`, and `Monologue` to handle alignment jobs. * Implemented methods for processing jobs in forced_alignment_client. * Created a module for forced alignment models to organize classes. * Updated generic_api_client get_result_object methods optionally specify the result route to accomodate the forced alignment endpoint which does NOT have a /result route. Instead it has a /transcript route as per [Get Forced Alignment Transcript By Id](https://docs.rev.ai/api/alignment/reference/#operation/GetAlignmentTranscriptById) Signed-off-by: Ahmed Mansour <[email protected]>
1 parent e36130e commit 7a4ebfa

File tree

5 files changed

+240
-4
lines changed

5 files changed

+240
-4
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# -*- coding: utf-8 -*-
2+
"""Client used for interacting with our forced alignment api"""
3+
4+
import json
5+
from .generic_api_client import GenericApiClient
6+
from .models.forced_alignment import ForcedAlignmentJob, ForcedAlignmentResult
7+
8+
9+
class ForcedAlignmentClient(GenericApiClient):
10+
"""Client for interacting with the Rev AI forced alignment api"""
11+
12+
# Default version of Rev AI forced alignment api
13+
api_version = 'v1'
14+
15+
# Default api name of Rev AI forced alignment api
16+
api_name = 'alignment'
17+
18+
def __init__(self, access_token):
19+
"""Constructor
20+
21+
:param access_token: access token which authorizes all requests and links them to your
22+
account. Generated on the settings page of your account dashboard
23+
on Rev AI.
24+
"""
25+
GenericApiClient.__init__(self, access_token, self.api_name, self.api_version,
26+
ForcedAlignmentJob.from_json, ForcedAlignmentResult.from_json)
27+
28+
def submit_job_url(
29+
self,
30+
source_config=None,
31+
source_transcript_config=None,
32+
transcript_text=None,
33+
metadata=None,
34+
delete_after_seconds=None,
35+
notification_config=None,
36+
language=None):
37+
"""Submit a job to the Rev AI forced alignment api.
38+
39+
:param source_config: CustomerUrlData object containing url of the source media and
40+
optional authentication headers to use when accessing the source url
41+
:param source_transcript_config: CustomerUrlData object containing url of the transcript file and
42+
optional authentication headers to use when accessing the transcript url
43+
:param transcript_text: The text of the transcript to be aligned (no punctuation, just words)
44+
:param metadata: info to associate with the alignment job
45+
:param delete_after_seconds: number of seconds after job completion when job is auto-deleted
46+
:param notification_config: CustomerUrlData object containing the callback url to
47+
invoke on job completion as a webhook and optional authentication headers to use when
48+
calling the callback url
49+
:param language: Language code for the audio and transcript. One of: "en", "es", "fr"
50+
:returns: ForcedAlignmentJob object
51+
:raises: HTTPError
52+
"""
53+
if not source_config:
54+
raise ValueError('source_config must be provided')
55+
if not (source_transcript_config or transcript_text):
56+
raise ValueError('Either source_transcript_config or transcript_text must be provided')
57+
if source_transcript_config and transcript_text:
58+
raise ValueError('Only one of source_transcript_config or transcript_text may be provided')
59+
60+
payload = self._enhance_payload({
61+
'source_config': source_config.to_dict() if source_config else None,
62+
'source_transcript_config': source_transcript_config.to_dict() if source_transcript_config else None,
63+
'transcript_text': transcript_text,
64+
'language': language
65+
}, metadata, None, delete_after_seconds, notification_config)
66+
67+
return self._submit_job(payload)
68+
69+
def get_result_json(self, id_):
70+
"""Get result of a forced alignment job as json.
71+
72+
:param id_: id of job to be requested
73+
:returns: job result data as raw json
74+
:raises: HTTPError
75+
"""
76+
return self._get_result_json(id_, {}, route='transcript')
77+
78+
def get_result_object(self, id_):
79+
"""Get result of a forced alignment job as ForcedAlignmentResult object.
80+
81+
:param id_: id of job to be requested
82+
:returns: job result data as ForcedAlignmentResult object
83+
:raises: HTTPError
84+
"""
85+
return self._get_result_object(id_, {}, route='transcript')

src/rev_ai/generic_api_client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def get_list_of_jobs(self, limit=None, starting_after=None):
105105

106106
return [self.parse_job_info(job) for job in response.json()]
107107

108-
def _get_result_json(self, id_, params):
108+
def _get_result_json(self, id_, params, route='result'):
109109
"""Get the result of a job. This method is special in that it is intended to be hidden by
110110
the implementation this is done because python standard is to pass options individually
111111
instead of as an object and our true clients should match this standard
@@ -124,12 +124,12 @@ def _get_result_json(self, id_, params):
124124

125125
response = self._make_http_request(
126126
"GET",
127-
urljoin(self.base_url, 'jobs/{0}/result?{1}'.format(id_, '&'.join(query_params)))
127+
urljoin(self.base_url, 'jobs/{0}/{1}?{2}'.format(id_, route, '&'.join(query_params)))
128128
)
129129

130130
return response.json()
131131

132-
def _get_result_object(self, id_, params):
132+
def _get_result_object(self, id_, params, route='result'):
133133
"""Get the result of a job. This method is special in that it is intended to be hidden by
134134
the implementation this is done because python standard is to pass options individually
135135
instead of as an object and our true clients should match this standard
@@ -138,7 +138,7 @@ def _get_result_object(self, id_, params):
138138
:returns: job result data as object
139139
:raises: HTTPError
140140
"""
141-
return self.parse_job_result(self._get_result_json(id_, params))
141+
return self.parse_job_result(self._get_result_json(id_, params, route))
142142

143143
def delete_job(self, id_):
144144
"""Delete a specific job
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""Module containing models for Rev AI forced alignment"""
2+
3+
from .forced_alignment_job import ForcedAlignmentJob
4+
from .forced_alignment_result import ForcedAlignmentResult, Monologue, ElementAlignment
5+
6+
__all__ = ['ForcedAlignmentJob', 'ForcedAlignmentResult', 'Monologue', 'ElementAlignment']
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# -*- coding: utf-8 -*-
2+
"""Contains ForcedAlignmentJob dataclass"""
3+
4+
from typing import Dict, Any
5+
from ..asynchronous.job_status import JobStatus
6+
7+
8+
class ForcedAlignmentJob:
9+
def __init__(
10+
self, id_, created_on, status,
11+
completed_on=None,
12+
callback_url=None,
13+
metadata=None,
14+
media_url=None,
15+
failure=None,
16+
failure_detail=None,
17+
processed_duration_seconds=None,
18+
delete_after_seconds=None):
19+
"""Dataclass containing information about a Rev AI forced alignment job
20+
21+
:param id: unique identifier for this job
22+
:param status: current job status
23+
:param created_on: date and time at which this job was created
24+
:param completed_on: date and time at which this job was completed
25+
:param metadata: customer-provided metadata
26+
:param type: type of job (always "alignment")
27+
:param media_url: URL of the media to be aligned
28+
:param failure: details about job failure if status is "failed"
29+
"""
30+
self.id = id_
31+
self.created_on = created_on
32+
self.status = status
33+
self.completed_on = completed_on
34+
self.callback_url = callback_url
35+
self.metadata = metadata
36+
self.media_url = media_url
37+
self.failure = failure
38+
self.failure_detail = failure_detail
39+
self.processed_duration_seconds = processed_duration_seconds
40+
self.delete_after_seconds = delete_after_seconds
41+
42+
def __eq__(self, other):
43+
"""Override default equality operator"""
44+
if isinstance(other, self.__class__):
45+
return self.__dict__ == other.__dict__
46+
return False
47+
48+
@classmethod
49+
def from_json(cls, json: Dict[str, Any]) -> 'ForcedAlignmentJob':
50+
"""Alternate constructor used for parsing json
51+
52+
:param json: json dictionary to convert
53+
:returns: ForcedAlignmentJob
54+
"""
55+
return cls(
56+
id_=json['id'],
57+
created_on=json['created_on'],
58+
status=JobStatus.from_string(json['status']),
59+
completed_on=json.get('completed_on'),
60+
callback_url=json.get('callback_url'),
61+
metadata=json.get('metadata'),
62+
media_url=json.get('media_url'),
63+
failure=json.get('failure'),
64+
failure_detail=json.get('failure_detail'),
65+
processed_duration_seconds=json.get('processed_duration_seconds'),
66+
delete_after_seconds=json.get('delete_after_seconds')
67+
)
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# -*- coding: utf-8 -*-
2+
"""Contains ForcedAlignmentResult dataclass"""
3+
4+
from dataclasses import dataclass
5+
from typing import List, Dict, Any
6+
7+
8+
@dataclass
9+
class ElementAlignment:
10+
"""Dataclass containing information about an aligned word
11+
12+
:param value: the word that was aligned
13+
:param ts: start time of the word in seconds
14+
:param end_ts: end time of the word in seconds
15+
:param type: type of element (always "text")
16+
"""
17+
value: str
18+
ts: float
19+
end_ts: float
20+
type: str = "text"
21+
22+
@staticmethod
23+
def from_json(json: Dict[str, Any]) -> 'ElementAlignment':
24+
"""Creates an ElementAlignment from the given json dictionary
25+
26+
:param json: json dictionary to convert
27+
:returns: ElementAlignment
28+
"""
29+
return ElementAlignment(
30+
value=json.get('value'),
31+
ts=json.get('ts'),
32+
end_ts=json.get('end_ts'),
33+
type=json.get('type', 'text')
34+
)
35+
36+
37+
@dataclass
38+
class Monologue:
39+
"""Dataclass containing information about a monologue section
40+
41+
:param speaker: speaker identifier
42+
:param elements: list of words in this monologue with timing information
43+
"""
44+
speaker: int
45+
elements: List[ElementAlignment]
46+
47+
@staticmethod
48+
def from_json(json: Dict[str, Any]) -> 'Monologue':
49+
"""Creates a Monologue from the given json dictionary
50+
51+
:param json: json dictionary to convert
52+
:returns: Monologue
53+
"""
54+
return Monologue(
55+
speaker=json.get('speaker', 0),
56+
elements=[ElementAlignment.from_json(element) for element in json.get('elements', [])]
57+
)
58+
59+
60+
@dataclass
61+
class ForcedAlignmentResult:
62+
"""Dataclass containing the result of a forced alignment job
63+
64+
:param monologues: A Monologue object per speaker containing the words
65+
they spoke with timing information
66+
"""
67+
monologues: List[Monologue]
68+
69+
@staticmethod
70+
def from_json(json: Dict[str, Any]) -> 'ForcedAlignmentResult':
71+
"""Creates a ForcedAlignmentResult from the given json dictionary
72+
73+
:param json: json dictionary to convert
74+
:returns: ForcedAlignmentResult
75+
"""
76+
return ForcedAlignmentResult(
77+
monologues=[Monologue.from_json(monologue) for monologue in json.get('monologues', [])]
78+
)

0 commit comments

Comments
 (0)