Skip to content

Commit f74cb9f

Browse files
committed
Add support for G.722 audio
The G.722 codec is "special" because the clock rate is 8kHz even though the sampling rate is 16kHz.
1 parent eeb29b7 commit f74cb9f

File tree

8 files changed

+209
-45
lines changed

8 files changed

+209
-45
lines changed

examples/server/index.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ <h2>Options</h2>
4848
<select id="audio-codec">
4949
<option value="default" selected>Default codecs</option>
5050
<option value="opus/48000/2">Opus</option>
51+
<option value="G722/8000">G722</option>
5152
<option value="PCMU/8000">PCMU</option>
5253
<option value="PCMA/8000">PCMA</option>
5354
</select>

src/aiortc/codecs/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,16 @@
1111
)
1212
from .base import Decoder, Encoder
1313
from .g711 import PcmaDecoder, PcmaEncoder, PcmuDecoder, PcmuEncoder
14+
from .g722 import G722Decoder, G722Encoder
1415
from .h264 import H264Decoder, H264Encoder, h264_depayload
1516
from .opus import OpusDecoder, OpusEncoder
1617
from .vpx import Vp8Decoder, Vp8Encoder, vp8_depayload
1718

19+
# The clockrate for G.722 is 8kHz even though the sampling rate is 16kHz.
20+
# See https://datatracker.ietf.org/doc/html/rfc3551
21+
G722_CODEC = RTCRtpCodecParameters(
22+
mimeType="audio/G722", clockRate=8000, channels=1, payloadType=9
23+
)
1824
PCMU_CODEC = RTCRtpCodecParameters(
1925
mimeType="audio/PCMU", clockRate=8000, channels=1, payloadType=0
2026
)
@@ -27,6 +33,7 @@
2733
RTCRtpCodecParameters(
2834
mimeType="audio/opus", clockRate=48000, channels=2, payloadType=96
2935
),
36+
G722_CODEC,
3037
PCMU_CODEC,
3138
PCMA_CODEC,
3239
],
@@ -141,7 +148,9 @@ def get_capabilities(kind: str) -> RTCRtpCapabilities:
141148
def get_decoder(codec: RTCRtpCodecParameters) -> Decoder:
142149
mimeType = codec.mimeType.lower()
143150

144-
if mimeType == "audio/opus":
151+
if mimeType == "audio/g722":
152+
return G722Decoder()
153+
elif mimeType == "audio/opus":
145154
return OpusDecoder()
146155
elif mimeType == "audio/pcma":
147156
return PcmaDecoder()
@@ -158,7 +167,9 @@ def get_decoder(codec: RTCRtpCodecParameters) -> Decoder:
158167
def get_encoder(codec: RTCRtpCodecParameters) -> Encoder:
159168
mimeType = codec.mimeType.lower()
160169

161-
if mimeType == "audio/opus":
170+
if mimeType == "audio/g722":
171+
return G722Encoder()
172+
elif mimeType == "audio/opus":
162173
return OpusEncoder()
163174
elif mimeType == "audio/pcma":
164175
return PcmaEncoder()

src/aiortc/codecs/g722.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import fractions
2+
from typing import Optional, cast
3+
4+
from av import AudioCodecContext, AudioFrame, AudioResampler, CodecContext
5+
from av.frame import Frame
6+
from av.packet import Packet
7+
8+
from ..jitterbuffer import JitterFrame
9+
from ..mediastreams import convert_timebase
10+
from .base import Decoder, Encoder
11+
12+
SAMPLE_RATE = 16000
13+
SAMPLE_WIDTH = 2
14+
SAMPLES_PER_FRAME = 320
15+
TIME_BASE = fractions.Fraction(1, 16000)
16+
17+
# Even though the sample rate is 16kHz, the clockrate is defined as 8kHz.
18+
# This is why we have multiplications and divisions by 2 in the code.
19+
CLOCK_BASE = fractions.Fraction(1, 8000)
20+
21+
22+
class G722Decoder(Decoder):
23+
def __init__(self) -> None:
24+
self.codec = cast(AudioCodecContext, CodecContext.create("g722", "r"))
25+
self.codec.format = "s16"
26+
self.codec.layout = "mono"
27+
self.codec.sample_rate = SAMPLE_RATE
28+
29+
def decode(self, encoded_frame: JitterFrame) -> list[Frame]:
30+
packet = Packet(encoded_frame.data)
31+
packet.pts = encoded_frame.timestamp * 2
32+
packet.time_base = TIME_BASE
33+
return cast(list[Frame], self.codec.decode(packet))
34+
35+
36+
class G722Encoder(Encoder):
37+
def __init__(self) -> None:
38+
self.codec = cast(AudioCodecContext, CodecContext.create("g722", "w"))
39+
self.codec.format = "s16"
40+
self.codec.layout = "mono"
41+
self.codec.sample_rate = SAMPLE_RATE
42+
self.codec.time_base = TIME_BASE
43+
self.first_pts: Optional[int] = None
44+
45+
# Create our own resampler to control the frame size.
46+
self.resampler = AudioResampler(
47+
format="s16",
48+
layout="mono",
49+
rate=SAMPLE_RATE,
50+
frame_size=SAMPLES_PER_FRAME,
51+
)
52+
53+
def encode(
54+
self, frame: Frame, force_keyframe: bool = False
55+
) -> tuple[list[bytes], int]:
56+
assert isinstance(frame, AudioFrame)
57+
assert frame.format.name == "s16"
58+
assert frame.layout.name in ["mono", "stereo"]
59+
60+
# Send frame through resampler and encoder.
61+
packets = []
62+
for frame in self.resampler.resample(frame):
63+
packets += self.codec.encode(frame)
64+
65+
if packets:
66+
# Packets were returned.
67+
if self.first_pts is None:
68+
self.first_pts = packets[0].pts
69+
timestamp = (packets[0].pts - self.first_pts) // 2
70+
return [bytes(p) for p in packets], timestamp
71+
else:
72+
# No packets were returned due to buffering.
73+
return [], None
74+
75+
def pack(self, packet: Packet) -> tuple[list[bytes], int]:
76+
timestamp = convert_timebase(packet.pts, packet.time_base, CLOCK_BASE)
77+
return [bytes(packet)], timestamp

tests/codecs.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import fractions
2+
from typing import Optional
23

34
from aiortc.codecs import depayload, get_decoder, get_encoder
45
from aiortc.jitterbuffer import JitterFrame
@@ -20,7 +21,7 @@ def assertAudioFrame(
2021
pts: int,
2122
samples: int,
2223
sample_rate: int,
23-
data: bytes,
24+
data: Optional[bytes],
2425
) -> None:
2526
assert isinstance(frame, AudioFrame)
2627
self.assertEqual(frame.format.name, "s16")
@@ -30,8 +31,9 @@ def assertAudioFrame(
3031
self.assertEqual(frame.sample_rate, sample_rate)
3132
self.assertEqual(frame.time_base, fractions.Fraction(1, sample_rate))
3233

33-
plane_data = bytes(frame.planes[0])
34-
self.assertEqual(plane_data[: len(data)], data)
34+
if data is not None:
35+
plane_data = bytes(frame.planes[0])
36+
self.assertEqual(plane_data[: len(data)], data)
3537

3638
def create_audio_frame(
3739
self, samples: int, pts: int, layout: str = "mono", sample_rate: int = 48000
@@ -115,10 +117,8 @@ def create_video_frames(
115117
def roundtrip_audio(
116118
self,
117119
codec: RTCRtpCodecParameters,
118-
output_layout: str,
119-
output_sample_rate: int,
120-
input_layout: str = "mono",
121-
input_sample_rate: int = 8000,
120+
layout: str,
121+
sample_rate: int,
122122
drop: list[int] = [],
123123
) -> None:
124124
"""
@@ -127,15 +127,16 @@ def roundtrip_audio(
127127
encoder = get_encoder(codec)
128128
decoder = get_decoder(codec)
129129

130+
samples = int(sample_rate * AUDIO_PTIME)
131+
time_base = fractions.Fraction(1, sample_rate)
132+
130133
input_frames = self.create_audio_frames(
131-
layout=input_layout, sample_rate=input_sample_rate, count=10
134+
layout=layout, sample_rate=sample_rate, count=10
132135
)
133-
134-
output_sample_count = int(output_sample_rate * AUDIO_PTIME)
135-
136136
for i, frame in enumerate(input_frames):
137137
# encode
138138
packages, timestamp = encoder.encode(frame)
139+
self.assertEqual(timestamp, i * codec.clockRate * AUDIO_PTIME)
139140

140141
if i not in drop:
141142
# depacketize
@@ -148,13 +149,11 @@ def roundtrip_audio(
148149
self.assertEqual(len(frames), 1)
149150
assert isinstance(frames[0], AudioFrame)
150151
self.assertEqual(frames[0].format.name, "s16")
151-
self.assertEqual(frames[0].layout.name, output_layout)
152-
self.assertEqual(frames[0].samples, output_sample_rate * AUDIO_PTIME)
153-
self.assertEqual(frames[0].sample_rate, output_sample_rate)
154-
self.assertEqual(frames[0].pts, i * output_sample_count)
155-
self.assertEqual(
156-
frames[0].time_base, fractions.Fraction(1, output_sample_rate)
157-
)
152+
self.assertEqual(frames[0].layout.name, layout)
153+
self.assertEqual(frames[0].samples, samples)
154+
self.assertEqual(frames[0].sample_rate, sample_rate)
155+
self.assertEqual(frames[0].pts, i * samples)
156+
self.assertEqual(frames[0].time_base, time_base)
158157

159158
def roundtrip_video(
160159
self,

tests/test_g711.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_decoder(self) -> None:
2828
sample_rate=8000,
2929
)
3030

31-
def test_encoder_mono_8hz(self) -> None:
31+
def test_encoder_mono_8khz(self) -> None:
3232
encoder = get_encoder(PCMA_CODEC)
3333
self.assertIsInstance(encoder, PcmaEncoder)
3434

@@ -86,12 +86,10 @@ def test_encoder_pack(self) -> None:
8686
self.assertEqual(timestamp, 8)
8787

8888
def test_roundtrip(self) -> None:
89-
self.roundtrip_audio(PCMA_CODEC, output_layout="mono", output_sample_rate=8000)
89+
self.roundtrip_audio(PCMA_CODEC, layout="mono", sample_rate=8000)
9090

9191
def test_roundtrip_with_loss(self) -> None:
92-
self.roundtrip_audio(
93-
PCMA_CODEC, output_layout="mono", output_sample_rate=8000, drop=[1]
94-
)
92+
self.roundtrip_audio(PCMA_CODEC, layout="mono", sample_rate=8000, drop=[1])
9593

9694

9795
class PcmuTest(CodecTestCase):
@@ -111,7 +109,7 @@ def test_decoder(self) -> None:
111109
sample_rate=8000,
112110
)
113111

114-
def test_encoder_mono_8hz(self) -> None:
112+
def test_encoder_mono_8khz(self) -> None:
115113
encoder = get_encoder(PCMU_CODEC)
116114
self.assertIsInstance(encoder, PcmuEncoder)
117115

@@ -160,9 +158,7 @@ def test_encoder_stereo_48khz(self) -> None:
160158
)
161159

162160
def test_roundtrip(self) -> None:
163-
self.roundtrip_audio(PCMU_CODEC, output_layout="mono", output_sample_rate=8000)
161+
self.roundtrip_audio(PCMU_CODEC, layout="mono", sample_rate=8000)
164162

165163
def test_roundtrip_with_loss(self) -> None:
166-
self.roundtrip_audio(
167-
PCMU_CODEC, output_layout="mono", output_sample_rate=8000, drop=[1]
168-
)
164+
self.roundtrip_audio(PCMU_CODEC, layout="mono", sample_rate=8000, drop=[1])

tests/test_g722.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from aiortc.codecs import G722_CODEC, get_decoder, get_encoder
2+
from aiortc.codecs.g722 import G722Decoder, G722Encoder
3+
from aiortc.jitterbuffer import JitterFrame
4+
5+
from .codecs import CodecTestCase
6+
7+
# silence
8+
G722_PAYLOAD = b"\xfa" * 160
9+
10+
11+
class G722Test(CodecTestCase):
12+
def test_decoder(self) -> None:
13+
decoder = get_decoder(G722_CODEC)
14+
self.assertIsInstance(decoder, G722Decoder)
15+
16+
frames = decoder.decode(JitterFrame(data=G722_PAYLOAD, timestamp=0))
17+
self.assertEqual(len(frames), 1)
18+
frame = frames[0]
19+
self.assertAudioFrame(
20+
frame,
21+
data=None,
22+
layout="mono",
23+
pts=0,
24+
samples=320,
25+
sample_rate=16000,
26+
)
27+
28+
def test_encoder_mono_16khz(self) -> None:
29+
encoder = get_encoder(G722_CODEC)
30+
self.assertIsInstance(encoder, G722Encoder)
31+
32+
for frame in self.create_audio_frames(
33+
layout="mono", sample_rate=16000, count=10
34+
):
35+
payloads, timestamp = encoder.encode(frame)
36+
self.assertEqual(len(payloads), 1)
37+
self.assertEqual(len(payloads[0]), 160)
38+
self.assertEqual(timestamp, frame.pts // 2)
39+
40+
def test_encoder_stereo_16khz(self) -> None:
41+
encoder = get_encoder(G722_CODEC)
42+
self.assertIsInstance(encoder, G722Encoder)
43+
44+
for frame in self.create_audio_frames(
45+
layout="stereo", sample_rate=16000, count=10
46+
):
47+
payloads, timestamp = encoder.encode(frame)
48+
self.assertEqual(len(payloads), 1)
49+
self.assertEqual(len(payloads[0]), 160)
50+
self.assertEqual(timestamp, frame.pts // 2)
51+
52+
def test_encoder_stereo_48khz(self) -> None:
53+
encoder = get_encoder(G722_CODEC)
54+
self.assertIsInstance(encoder, G722Encoder)
55+
56+
output = [
57+
encoder.encode(frame)
58+
for frame in self.create_audio_frames(
59+
layout="stereo", sample_rate=48000, count=10
60+
)
61+
]
62+
self.assertEqual(
63+
[([len(p) for p in payloads], timestamp) for payloads, timestamp in output],
64+
[
65+
([], None), # No output due to buffering.
66+
([160], 0),
67+
([160], 160),
68+
([160], 320),
69+
([160], 480),
70+
([160], 640),
71+
([160], 800),
72+
([160], 960),
73+
([160], 1120),
74+
([160], 1280),
75+
],
76+
)
77+
78+
def test_encoder_pack(self) -> None:
79+
encoder = get_encoder(G722_CODEC)
80+
self.assertTrue(isinstance(encoder, G722Encoder))
81+
82+
packet = self.create_packet(payload=G722_PAYLOAD, pts=1)
83+
payloads, timestamp = encoder.pack(packet)
84+
self.assertEqual(payloads, [G722_PAYLOAD])
85+
self.assertEqual(timestamp, 8)
86+
87+
def test_roundtrip(self) -> None:
88+
self.roundtrip_audio(G722_CODEC, layout="mono", sample_rate=16000)
89+
90+
def test_roundtrip_with_loss(self) -> None:
91+
self.roundtrip_audio(G722_CODEC, layout="mono", sample_rate=16000, drop=[1])

tests/test_opus.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -91,20 +91,7 @@ def test_encoder_pack(self) -> None:
9191
self.assertEqual(timestamp, 48)
9292

9393
def test_roundtrip(self) -> None:
94-
self.roundtrip_audio(
95-
OPUS_CODEC,
96-
input_layout="stereo",
97-
input_sample_rate=48000,
98-
output_layout="stereo",
99-
output_sample_rate=48000,
100-
)
94+
self.roundtrip_audio(OPUS_CODEC, layout="stereo", sample_rate=48000)
10195

10296
def test_roundtrip_with_loss(self) -> None:
103-
self.roundtrip_audio(
104-
OPUS_CODEC,
105-
input_layout="stereo",
106-
input_sample_rate=48000,
107-
output_layout="stereo",
108-
output_sample_rate=48000,
109-
drop=[1],
110-
)
97+
self.roundtrip_audio(OPUS_CODEC, layout="stereo", sample_rate=48000, drop=[1])

tests/test_rtcpeerconnection.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,7 @@ async def _test_connect_audio_bidirectional(
825825
self.assertTrue(
826826
lf2crlf(
827827
"""a=rtpmap:96 opus/48000/2
828+
a=rtpmap:9 G722/8000
828829
a=rtpmap:0 PCMU/8000
829830
a=rtpmap:8 PCMA/8000
830831
"""
@@ -863,6 +864,7 @@ async def _test_connect_audio_bidirectional(
863864
self.assertTrue(
864865
lf2crlf(
865866
"""a=rtpmap:96 opus/48000/2
867+
a=rtpmap:9 G722/8000
866868
a=rtpmap:0 PCMU/8000
867869
a=rtpmap:8 PCMA/8000
868870
"""

0 commit comments

Comments
 (0)