Skip to content

Commit cba1bc7

Browse files
committed
Fix base64 audio handling for automatic-speech-recognition
1 parent ff2cf24 commit cba1bc7

File tree

1 file changed

+39
-24
lines changed

1 file changed

+39
-24
lines changed

src/huggingface_inference_toolkit/serialization/json_utils.py

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,49 @@ def default(obj):
1414
raise TypeError
1515

1616

17+
def _is_base64_audio(data):
18+
"""Check if data appears to be base64 encoded audio."""
19+
if not isinstance(data, str):
20+
return False
21+
22+
try:
23+
decoded = base64.b64decode(data)
24+
# Check for common audio file headers
25+
# WAV: starts with 'RIFF' and contains 'WAVE'
26+
if decoded.startswith(b"RIFF") and b"WAVE" in decoded[:20]:
27+
return True
28+
# MP3: starts with 'ID3' or has MP3 frame sync
29+
if decoded.startswith(b"ID3") or (len(decoded) > 2 and decoded[0] == 0xFF and (decoded[1] & 0xE0) == 0xE0):
30+
return True
31+
# FLAC: starts with 'fLaC'
32+
if decoded.startswith(b"fLaC"):
33+
return True
34+
# OGG: starts with 'OggS'
35+
if decoded.startswith(b"OggS"):
36+
return True
37+
# M4A/AAC: starts with specific atoms
38+
if decoded.startswith((b"ftypM4A", b"ftypisom", b"ftypmp42")):
39+
return True
40+
except Exception:
41+
return False
42+
43+
return False
44+
45+
1746
class Jsoner:
1847
@staticmethod
1948
def deserialize(body):
20-
return orjson.loads(body)
49+
data = orjson.loads(body)
50+
51+
if isinstance(data, dict) and "inputs" in data:
52+
if _is_base64_audio(data["inputs"]):
53+
try:
54+
data["inputs"] = base64.b64decode(data["inputs"])
55+
except Exception:
56+
pass
57+
58+
return data
2159

2260
@staticmethod
2361
def serialize(body, accept=None):
2462
return orjson.dumps(body, option=orjson.OPT_SERIALIZE_NUMPY, default=default)
25-
26-
27-
# class _JSONEncoder(json.JSONEncoder):
28-
# """
29-
# custom `JSONEncoder` to make sure float and int64 ar converted
30-
# """
31-
32-
# def default(self, obj):
33-
# if isinstance(obj, np.integer):
34-
# return int(obj)
35-
# elif isinstance(obj, np.floating):
36-
# return float(obj)
37-
# elif isinstance(obj, np.ndarray):
38-
# return obj.tolist()
39-
# elif isinstance(obj, datetime.datetime):
40-
# return obj.__str__()
41-
# elif isinstance(obj, Image.Image):
42-
# with BytesIO() as out:
43-
# obj.save(out, format="PNG")
44-
# png_string = out.getvalue()
45-
# return base64.b64encode(png_string).decode("utf-8")
46-
# else:
47-
# return super(_JSONEncoder, self).default(obj)

0 commit comments

Comments
 (0)