Skip to content

Commit f12706c

Browse files
committed
docs(examples): added example about how to handle discords waveform
1 parent 7f2beb7 commit f12706c

File tree

1 file changed

+242
-0
lines changed

1 file changed

+242
-0
lines changed

examples/voice_message_waveform.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
from __future__ import annotations
2+
3+
import base64
4+
import io
5+
from typing import Any
6+
7+
import numpy as np
8+
from PIL import Image, ImageDraw
9+
10+
import discord
11+
12+
13+
class WaveformVisualizer:
14+
"""
15+
A class to visualize audio waveforms.
16+
17+
Attributes
18+
----------
19+
waveform_byte_data : numpy.ndarray
20+
The decoded waveform byte data.
21+
22+
Methods
23+
-------
24+
decode_waveform(base64_waveform: str) -> np.ndarray[Any, np.dtype[np.uint8]]:
25+
Decodes the base64 encoded waveform string into a numpy array.
26+
27+
create_waveform_image(width: int = 500, height: int = 100,
28+
background_color: Union[float, tuple[float, ...], str, None] = (0, 0, 0),
29+
bar_colors: Union[None, list[tuple[int, int, int]]] = None) -> Image.Image:
30+
Creates a visual representation of the waveform as an image.
31+
"""
32+
33+
def __init__(self, base64_waveform: str) -> None:
34+
"""
35+
Initializes the WaveformVisualizer with the provided base64 waveform string.
36+
37+
Parameters
38+
----------
39+
base64_waveform : str
40+
A base64 encoded string representing the waveform.
41+
"""
42+
self.waveform_byte_data: np.ndarray[Any, np.dtype[np.uint8]] = (
43+
self.decode_waveform(base64_waveform)
44+
)
45+
46+
@staticmethod
47+
def decode_waveform(base64_waveform: str) -> np.ndarray[Any, np.dtype[np.uint8]]:
48+
"""
49+
Decodes the base64 encoded waveform string into a numpy array.
50+
51+
Parameters
52+
----------
53+
base64_waveform : str
54+
The base64 encoded string of the waveform.
55+
56+
Returns
57+
-------
58+
np.ndarray
59+
A numpy array containing the decoded waveform byte data.
60+
"""
61+
return np.frombuffer(base64.b64decode(base64_waveform), dtype=np.uint8)
62+
63+
def create_waveform_image(
64+
self,
65+
width: int = 500,
66+
height: int = 100,
67+
background_color: float | tuple[float, ...] | str | None = (0, 0, 0),
68+
bar_colors: None | list[tuple[int, int, int]] = None,
69+
) -> Image.Image:
70+
"""
71+
Creates a visual representation of the waveform as an image.
72+
73+
Parameters
74+
----------
75+
width : int, optional
76+
The width of the resulting image, by default 500.
77+
height : int, optional
78+
The height of the resulting image, by default 100.
79+
background_color : float | tuple[float, ...] | str | None, optional
80+
The background color of the image, by default (0, 0, 0).
81+
bar_colors : list[tuple[int, int, int]] | None, optional
82+
A list of colors for the waveform bars, by default None.
83+
84+
Returns
85+
-------
86+
Image.Image
87+
A PIL Image object representing the waveform.
88+
"""
89+
# If no bar colors are provided, default to a predefined gradient of blue shades.
90+
if bar_colors is None:
91+
bar_colors = [(173, 216, 230), (135, 206, 235), (0, 191, 255)]
92+
# These RGB tuples represent light shades of blue, commonly associated with a calm, cool color palette.
93+
94+
# Create a new blank image with the specified background color.
95+
# The image will be RGB (Red, Green, Blue) format, with the given width and height.
96+
# The background color fills the entire image initially.
97+
image = Image.new("RGB", (width, height), background_color)
98+
99+
# Initialize the ImageDraw object to draw on the image.
100+
# The 'draw' object will be used to draw shapes (like rectangles) on the 'image'.
101+
draw = ImageDraw.Draw(image)
102+
103+
# Calculate the width of each bar in the waveform visualization.
104+
# The total width of the image is divided by twice the number of waveform data points.
105+
# This division ensures that the bars are narrow enough to fit into the image, with some spacing between them.
106+
bar_width = width / len(self.waveform_byte_data) / 2
107+
108+
# Calculate the scaling factor for the X-axis (horizontal scale).
109+
# This factor determines the horizontal spacing between the bars.
110+
# It ensures that the bars are evenly spaced across the entire width of the image.
111+
x_scale = width / len(self.waveform_byte_data)
112+
113+
# Calculate the scaling factor for the Y-axis (vertical scale).
114+
# The height of the image is divided by twice the maximum possible waveform value (255),
115+
# as the waveform values range from 0 to 255.
116+
# This scaling ensures that the waveform is vertically centered in the image and that the bars are proportional to the waveform's amplitude.
117+
y_scale = height / 2 / 255
118+
119+
for i, value in enumerate(self.waveform_byte_data):
120+
# Check if the current value is a tuple. In some cases, the waveform data might be stored as a tuple (e.g., (value,)).
121+
# If it is a tuple, extract the first item to get the actual waveform value.
122+
if isinstance(value, tuple):
123+
value = value[0] # Extract the first item from the tuple, which is the actual waveform value.
124+
125+
# Calculate the X position of the current bar.
126+
# The position is determined by the index of the current value (i) multiplied by the scaling factor (x_scale).
127+
x1 = i * x_scale
128+
129+
# Calculate the height of the bar representing the waveform at this point.
130+
# The height is determined by multiplying the waveform value (converted to a float) by the Y scaling factor (y_scale).
131+
# The `max` function ensures that the bar has a minimum height of 2.0, even if the waveform value is very small.
132+
bar_height = max(2.0, float(value) * y_scale)
133+
134+
# Calculate the Y position of the top of the bar.
135+
# This is done by subtracting the bar's height from half the total height, so the bar extends upwards from the middle of the image.
136+
y1 = height / 2 - bar_height
137+
138+
# Calculate the Y position of the bottom of the bar.
139+
# This is simply half the height of the image plus the bar's height, so the bar also extends downwards from the middle of the image.
140+
y2 = height / 2 + bar_height
141+
142+
# Determine the color of the bar.
143+
# The color is chosen from the `bar_colors` list, cycling through the colors using the modulo operator (`%`).
144+
# This ensures that the colors repeat in a loop if there are more bars than colors.
145+
color_index = i % len(bar_colors)
146+
color = bar_colors[color_index]
147+
148+
# Draw the rectangle (bar) for this part of the waveform on the image.
149+
# The rectangle is drawn from (x1, y1) to (x1 + bar_width, y2) using the selected color.
150+
draw.rectangle([x1, y1, x1 + bar_width, y2], fill=color)
151+
152+
return image
153+
154+
155+
# Discord bot setup with specific intents (permissions)
156+
intents = discord.Intents.default()
157+
intents.message_content = True # Enables access to message content
158+
intents.members = True # Enables access to server members
159+
160+
bot = discord.Bot(intents=intents)
161+
162+
163+
@bot.event
164+
async def on_ready() -> None:
165+
"""
166+
Event handler for when the bot is ready.
167+
168+
This method is called automatically by the pycord library when the bot has successfully connected to Discord
169+
and is ready to start receiving events and commands.
170+
"""
171+
print("Ready!")
172+
173+
174+
@bot.event
175+
async def on_message(message: discord.Message) -> None:
176+
"""
177+
Event handler for when a message is received.
178+
179+
This method is called automatically whenever a new message is sent in any channel the bot can access.
180+
181+
Parameters
182+
----------
183+
message : discord.Message
184+
The message object containing information about the message sent.
185+
"""
186+
if message.author.id == bot.user.id:
187+
return # Ignore messages sent by the bot itself
188+
189+
if message.attachments and len(message.attachments) == 1: # Check if there's exactly one attachment
190+
target_attachment = message.attachments[0]
191+
if (
192+
target_attachment.content_type == "audio/ogg"
193+
and target_attachment.filename == "voice-message.ogg"
194+
): # Check if the attachment is a voice message
195+
print("We got a voice message!")
196+
await handle_voice_message(message, target_attachment)
197+
198+
199+
async def handle_voice_message(
200+
message: discord.Message, attachment: discord.Attachment
201+
) -> None:
202+
"""
203+
Handles the processing of voice message attachments.
204+
205+
Converts the waveform of the voice message to a visual image and sends it back in an embed.
206+
207+
Parameters
208+
----------
209+
message : discord.Message
210+
The message object containing the voice message.
211+
attachment : discord.Attachment
212+
The attachment object representing the voice message.
213+
"""
214+
# Create a visual image of the waveform using the WaveformVisualizer class
215+
image = WaveformVisualizer(attachment.waveform).create_waveform_image()
216+
217+
# Save the image to a byte buffer in PNG format
218+
image_buffer = io.BytesIO()
219+
image.save(image_buffer, format="PNG")
220+
image_buffer.seek(0)
221+
222+
# Create a discord.File object from the byte buffer to send as an attachment
223+
file = discord.File(
224+
image_buffer, "waveform.png", description="A neat waveform image!"
225+
)
226+
227+
# Create an embed to display information and the image
228+
embed = discord.Embed()
229+
embed.set_author(
230+
name=message.author.display_name, icon_url=message.author.display_avatar
231+
)
232+
embed.title = "Voice Message"
233+
embed.add_field(name="Duration", value=str(attachment.duration_secs))
234+
embed.set_image(url="attachment://waveform.png")
235+
embed.timestamp = message.created_at
236+
237+
# Reply to the original message with the embed and attached waveform image
238+
await message.reply(None, embed=embed, file=file)
239+
240+
241+
# Run the bot with the provided token
242+
bot.run("TOKEN")

0 commit comments

Comments
 (0)