Skip to content

Commit d0a57d1

Browse files
committed
Add support for G.722 audio
The G.722 codec is "special" because the clock rate is 8kHz even though the sampling rate is 16kHz.
1 parent eeb29b7 commit d0a57d1

File tree

5 files changed

+208
-7
lines changed

5 files changed

+208
-7
lines changed

src/aiortc/codecs/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,16 @@
1111
)
1212
from .base import Decoder, Encoder
1313
from .g711 import PcmaDecoder, PcmaEncoder, PcmuDecoder, PcmuEncoder
14+
from .g722 import G722Decoder, G722Encoder
1415
from .h264 import H264Decoder, H264Encoder, h264_depayload
1516
from .opus import OpusDecoder, OpusEncoder
1617
from .vpx import Vp8Decoder, Vp8Encoder, vp8_depayload
1718

19+
# The clockrate for G.722 is 8kHz even though the sampling rate is 16kHz.
20+
# See https://datatracker.ietf.org/doc/html/rfc3551
21+
G722_CODEC = RTCRtpCodecParameters(
22+
mimeType="audio/G722", clockRate=8000, channels=1, payloadType=9
23+
)
1824
PCMU_CODEC = RTCRtpCodecParameters(
1925
mimeType="audio/PCMU", clockRate=8000, channels=1, payloadType=0
2026
)
@@ -27,6 +33,7 @@
2733
RTCRtpCodecParameters(
2834
mimeType="audio/opus", clockRate=48000, channels=2, payloadType=96
2935
),
36+
G722_CODEC,
3037
PCMU_CODEC,
3138
PCMA_CODEC,
3239
],
@@ -141,7 +148,9 @@ def get_capabilities(kind: str) -> RTCRtpCapabilities:
141148
def get_decoder(codec: RTCRtpCodecParameters) -> Decoder:
142149
mimeType = codec.mimeType.lower()
143150

144-
if mimeType == "audio/opus":
151+
if mimeType == "audio/g722":
152+
return G722Decoder()
153+
elif mimeType == "audio/opus":
145154
return OpusDecoder()
146155
elif mimeType == "audio/pcma":
147156
return PcmaDecoder()
@@ -158,7 +167,9 @@ def get_decoder(codec: RTCRtpCodecParameters) -> Decoder:
158167
def get_encoder(codec: RTCRtpCodecParameters) -> Encoder:
159168
mimeType = codec.mimeType.lower()
160169

161-
if mimeType == "audio/opus":
170+
if mimeType == "audio/g722":
171+
return G722Encoder()
172+
elif mimeType == "audio/opus":
162173
return OpusEncoder()
163174
elif mimeType == "audio/pcma":
164175
return PcmaEncoder()

src/aiortc/codecs/g722.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import fractions
2+
from typing import cast
3+
4+
from av import AudioFrame, AudioResampler, CodecContext
5+
from av.frame import Frame
6+
from av.packet import Packet
7+
8+
from ..jitterbuffer import JitterFrame
9+
from ..mediastreams import convert_timebase
10+
from .base import Decoder, Encoder
11+
12+
SAMPLE_RATE = 16000
13+
SAMPLE_WIDTH = 2
14+
SAMPLES_PER_FRAME = 320
15+
CLOCK_BASE = fractions.Fraction(1, 8000)
16+
TIME_BASE = fractions.Fraction(1, 16000)
17+
18+
19+
class G722Decoder(Decoder):
20+
def __init__(self) -> None:
21+
self.codec = CodecContext.create("g722", "r")
22+
self.codec.format = "s16"
23+
self.codec.layout = "mono"
24+
self.codec.sample_rate = SAMPLE_RATE
25+
26+
def decode(self, encoded_frame: JitterFrame) -> list[Frame]:
27+
packet = Packet(encoded_frame.data)
28+
packet.pts = encoded_frame.timestamp
29+
packet.time_base = TIME_BASE
30+
return cast(list[Frame], self.codec.decode(packet))
31+
32+
33+
class G722Encoder(Encoder):
34+
def __init__(self) -> None:
35+
self.codec = CodecContext.create("g722", "w")
36+
self.codec.format = "s16"
37+
self.codec.layout = "mono"
38+
self.codec.sample_rate = SAMPLE_RATE
39+
self.codec.time_base = TIME_BASE
40+
self.first_pts = None
41+
42+
# Create our own resampler to control the frame size.
43+
self.resampler = AudioResampler(
44+
format="s16",
45+
layout="mono",
46+
rate=SAMPLE_RATE,
47+
frame_size=SAMPLES_PER_FRAME,
48+
)
49+
50+
def encode(
51+
self, frame: Frame, force_keyframe: bool = False
52+
) -> tuple[list[bytes], int]:
53+
assert isinstance(frame, AudioFrame)
54+
assert frame.format.name == "s16"
55+
assert frame.layout.name in ["mono", "stereo"]
56+
57+
# Send frame through resampler and encoder.
58+
packets = []
59+
for frame in self.resampler.resample(frame):
60+
packets += self.codec.encode(frame)
61+
62+
if packets:
63+
# Packets were returned.
64+
if self.first_pts is None:
65+
self.first_pts = packets[0].pts
66+
# Even though the sample rate is 16kHz, the clockrate is defined as 8kHz.
67+
timestamp = (packets[0].pts - self.first_pts) // 2
68+
return [bytes(p) for p in packets], timestamp
69+
else:
70+
# No packets were returned due to buffering.
71+
return [], None
72+
73+
def pack(self, packet: Packet) -> tuple[list[bytes], int]:
74+
timestamp = convert_timebase(packet.pts, packet.time_base, CLOCK_BASE)
75+
return [bytes(packet)], timestamp

tests/codecs.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import fractions
2+
from typing import Optional
23

34
from aiortc.codecs import depayload, get_decoder, get_encoder
45
from aiortc.jitterbuffer import JitterFrame
@@ -20,7 +21,7 @@ def assertAudioFrame(
2021
pts: int,
2122
samples: int,
2223
sample_rate: int,
23-
data: bytes,
24+
data: Optional[bytes],
2425
) -> None:
2526
assert isinstance(frame, AudioFrame)
2627
self.assertEqual(frame.format.name, "s16")
@@ -30,8 +31,9 @@ def assertAudioFrame(
3031
self.assertEqual(frame.sample_rate, sample_rate)
3132
self.assertEqual(frame.time_base, fractions.Fraction(1, sample_rate))
3233

33-
plane_data = bytes(frame.planes[0])
34-
self.assertEqual(plane_data[: len(data)], data)
34+
if data is not None:
35+
plane_data = bytes(frame.planes[0])
36+
self.assertEqual(plane_data[: len(data)], data)
3537

3638
def create_audio_frame(
3739
self, samples: int, pts: int, layout: str = "mono", sample_rate: int = 48000
@@ -117,6 +119,7 @@ def roundtrip_audio(
117119
codec: RTCRtpCodecParameters,
118120
output_layout: str,
119121
output_sample_rate: int,
122+
output_clock_rate: Optional[int] = None,
120123
input_layout: str = "mono",
121124
input_sample_rate: int = 8000,
122125
drop: list[int] = [],
@@ -131,7 +134,11 @@ def roundtrip_audio(
131134
layout=input_layout, sample_rate=input_sample_rate, count=10
132135
)
133136

134-
output_sample_count = int(output_sample_rate * AUDIO_PTIME)
137+
# Usually the clock rate matches the sample rate, but not for G722.
138+
if output_clock_rate is not None:
139+
output_frame_pts = int(output_clock_rate * AUDIO_PTIME)
140+
else:
141+
output_frame_pts = int(output_sample_rate * AUDIO_PTIME)
135142

136143
for i, frame in enumerate(input_frames):
137144
# encode
@@ -151,7 +158,7 @@ def roundtrip_audio(
151158
self.assertEqual(frames[0].layout.name, output_layout)
152159
self.assertEqual(frames[0].samples, output_sample_rate * AUDIO_PTIME)
153160
self.assertEqual(frames[0].sample_rate, output_sample_rate)
154-
self.assertEqual(frames[0].pts, i * output_sample_count)
161+
self.assertEqual(frames[0].pts, i * output_frame_pts)
155162
self.assertEqual(
156163
frames[0].time_base, fractions.Fraction(1, output_sample_rate)
157164
)

tests/test_g722.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import sys
2+
3+
from aiortc.codecs import G722_CODEC, get_decoder, get_encoder
4+
from aiortc.codecs.g722 import G722Decoder, G722Encoder
5+
from aiortc.jitterbuffer import JitterFrame
6+
7+
from .codecs import CodecTestCase
8+
9+
# silence
10+
G722_PAYLOAD = b"\xfa" * 160
11+
12+
13+
class G722Test(CodecTestCase):
14+
def test_decoder(self) -> None:
15+
decoder = get_decoder(G722_CODEC)
16+
self.assertIsInstance(decoder, G722Decoder)
17+
18+
frames = decoder.decode(JitterFrame(data=G722_PAYLOAD, timestamp=0))
19+
self.assertEqual(len(frames), 1)
20+
frame = frames[0]
21+
self.assertAudioFrame(
22+
frame,
23+
data=None,
24+
layout="mono",
25+
pts=0,
26+
samples=320,
27+
sample_rate=16000,
28+
)
29+
30+
def test_encoder_mono_16khz(self) -> None:
31+
encoder = get_encoder(G722_CODEC)
32+
self.assertIsInstance(encoder, G722Encoder)
33+
34+
for frame in self.create_audio_frames(
35+
layout="mono", sample_rate=16000, count=10
36+
):
37+
payloads, timestamp = encoder.encode(frame)
38+
self.assertEqual(len(payloads), 1)
39+
self.assertEqual(len(payloads[0]), 160)
40+
self.assertEqual(timestamp, frame.pts // 2)
41+
42+
def test_encoder_stereo_16khz(self) -> None:
43+
encoder = get_encoder(G722_CODEC)
44+
self.assertIsInstance(encoder, G722Encoder)
45+
46+
for frame in self.create_audio_frames(
47+
layout="stereo", sample_rate=16000, count=10
48+
):
49+
payloads, timestamp = encoder.encode(frame)
50+
self.assertEqual(len(payloads), 1)
51+
self.assertEqual(len(payloads[0]), 160)
52+
self.assertEqual(timestamp, frame.pts // 2)
53+
54+
def test_encoder_stereo_48khz(self) -> None:
55+
encoder = get_encoder(G722_CODEC)
56+
self.assertIsInstance(encoder, G722Encoder)
57+
58+
output = [
59+
encoder.encode(frame)
60+
for frame in self.create_audio_frames(
61+
layout="stereo", sample_rate=48000, count=10
62+
)
63+
]
64+
self.assertEqual(
65+
[([len(p) for p in payloads], timestamp) for payloads, timestamp in output],
66+
[
67+
([], None), # No output due to buffering.
68+
([160], 0),
69+
([160], 160),
70+
([160], 320),
71+
([160], 480),
72+
([160], 640),
73+
([160], 800),
74+
([160], 960),
75+
([160], 1120),
76+
([160], 1280),
77+
],
78+
)
79+
80+
def test_encoder_pack(self) -> None:
81+
encoder = get_encoder(G722_CODEC)
82+
self.assertTrue(isinstance(encoder, G722Encoder))
83+
84+
packet = self.create_packet(payload=G722_PAYLOAD, pts=1)
85+
payloads, timestamp = encoder.pack(packet)
86+
self.assertEqual(payloads, [G722_PAYLOAD])
87+
self.assertEqual(timestamp, 8)
88+
89+
def test_roundtrip(self) -> None:
90+
self.roundtrip_audio(
91+
G722_CODEC,
92+
input_sample_rate=16000,
93+
output_clock_rate=8000,
94+
output_layout="mono",
95+
output_sample_rate=16000,
96+
)
97+
98+
def test_roundtrip_with_loss(self) -> None:
99+
self.roundtrip_audio(
100+
G722_CODEC,
101+
input_sample_rate=16000,
102+
output_clock_rate=8000,
103+
output_layout="mono",
104+
output_sample_rate=16000,
105+
drop=[1],
106+
)

tests/test_rtcpeerconnection.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,7 @@ async def _test_connect_audio_bidirectional(
825825
self.assertTrue(
826826
lf2crlf(
827827
"""a=rtpmap:96 opus/48000/2
828+
a=rtpmap:9 G722/8000
828829
a=rtpmap:0 PCMU/8000
829830
a=rtpmap:8 PCMA/8000
830831
"""
@@ -863,6 +864,7 @@ async def _test_connect_audio_bidirectional(
863864
self.assertTrue(
864865
lf2crlf(
865866
"""a=rtpmap:96 opus/48000/2
867+
a=rtpmap:9 G722/8000
866868
a=rtpmap:0 PCMU/8000
867869
a=rtpmap:8 PCMA/8000
868870
"""

0 commit comments

Comments
 (0)