Skip to content

Commit 0ce8377

Browse files
authored
Allow generic inference for ASR for superb (#185)
1 parent eed85ca commit 0ce8377

23 files changed

+73
-820
lines changed

api-inference-community/docker_images/superb/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ LABEL maintainer="Omar [email protected]"
33

44
# Add any system dependency here
55
# RUN apt-get update -y && apt-get install libXXX -y
6+
RUN apt-get update -y
7+
RUN apt-get install git -y
8+
RUN apt-get install libsndfile1 -y
9+
RUN apt-get install ffmpeg -y
610

711
COPY ./requirements.txt /app
812
RUN pip install --no-cache-dir -r requirements.txt
@@ -16,6 +20,7 @@ COPY ./prestart.sh /app/
1620
ARG max_workers=1
1721
ENV MAX_WORKERS=$max_workers
1822
ENV HUGGINGFACE_HUB_CACHE=/data
23+
ENV TORCH_HOME=/data
1924

2025
# Necessary on GPU environment docker.
2126
# TIMEOUT env variable is used by nvcr.io/nvidia/pytorch:xx for another purpose

api-inference-community/docker_images/superb/app/main.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import Dict, Type
55

66
from api_inference_community.routes import pipeline_route, status_ok
7-
from app.pipelines import Pipeline
7+
from app.pipelines import AutomaticSpeechRecognitionPipeline, Pipeline
88
from starlette.applications import Starlette
99
from starlette.middleware import Middleware
1010
from starlette.middleware.gzip import GZipMiddleware
@@ -32,7 +32,9 @@
3232
# ALLOWED_TASKS = {"automatic-speech-recognition": AutomaticSpeechRecognitionPipeline}
3333
# You can check the requirements and expectations of each pipelines in their respective
3434
# directories. Implement directly within the directories.
35-
ALLOWED_TASKS: Dict[str, Type[Pipeline]] = {}
35+
ALLOWED_TASKS: Dict[str, Type[Pipeline]] = {
36+
"automatic-speech-recognition": AutomaticSpeechRecognitionPipeline,
37+
}
3638

3739

3840
@functools.lru_cache()
Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,5 @@
11
from app.pipelines.base import Pipeline, PipelineException # isort:skip
22

3-
from app.pipelines.audio_to_audio import AudioToAudioPipeline
43
from app.pipelines.automatic_speech_recognition import (
54
AutomaticSpeechRecognitionPipeline,
65
)
7-
from app.pipelines.feature_extraction import FeatureExtractionPipeline
8-
from app.pipelines.image_classification import ImageClassificationPipeline
9-
from app.pipelines.question_answering import QuestionAnsweringPipeline
10-
from app.pipelines.sentence_similarity import SentenceSimilarityPipeline
11-
from app.pipelines.text_to_speech import TextToSpeechPipeline
12-
from app.pipelines.token_classification import TokenClassificationPipeline

api-inference-community/docker_images/superb/app/pipelines/audio_to_audio.py

Lines changed: 0 additions & 38 deletions
This file was deleted.

api-inference-community/docker_images/superb/app/pipelines/automatic_speech_recognition.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1+
import os
2+
import subprocess
3+
import sys
14
from typing import Dict
25

36
import numpy as np
47
from app.pipelines import Pipeline
8+
from huggingface_hub import snapshot_download
59

610

711
class AutomaticSpeechRecognitionPipeline(Pipeline):
@@ -12,10 +16,24 @@ def __init__(self, model_id: str):
1216
# This function is only called once, so do all the heavy processing I/O here
1317
# IMPLEMENT_THIS : Please define a `self.sampling_rate` for this pipeline
1418
# to automatically read the input correctly
19+
filepath = snapshot_download(model_id)
20+
sys.path.append(filepath)
21+
if "requirements.txt" in os.listdir(filepath):
22+
subprocess.check_call(
23+
[
24+
sys.executable,
25+
"-m",
26+
"pip",
27+
"install",
28+
"-r",
29+
os.path.join(filepath, "requirements.txt"),
30+
]
31+
)
32+
33+
from model import PreTrainedModel
34+
35+
self.model = PreTrainedModel(filepath)
1536
self.sampling_rate = 16000
16-
raise NotImplementedError(
17-
"Please implement AutomaticSpeechRecognitionPipeline __init__ function"
18-
)
1937

2038
def __call__(self, inputs: np.array) -> Dict[str, str]:
2139
"""
@@ -28,7 +46,4 @@ def __call__(self, inputs: np.array) -> Dict[str, str]:
2846
A :obj:`dict`:. The object return should be liked {"text": "XXX"} containing
2947
the detected langage from the input audio
3048
"""
31-
# IMPLEMENT_THIS
32-
raise NotImplementedError(
33-
"Please implement AutomaticSpeechRecognitionPipeline __call__ function"
34-
)
49+
return self.model(inputs)

api-inference-community/docker_images/superb/app/pipelines/feature_extraction.py

Lines changed: 0 additions & 30 deletions
This file was deleted.

api-inference-community/docker_images/superb/app/pipelines/image_classification.py

Lines changed: 0 additions & 33 deletions
This file was deleted.

api-inference-community/docker_images/superb/app/pipelines/question_answering.py

Lines changed: 0 additions & 34 deletions
This file was deleted.

api-inference-community/docker_images/superb/app/pipelines/sentence_similarity.py

Lines changed: 0 additions & 33 deletions
This file was deleted.

api-inference-community/docker_images/superb/app/pipelines/text_to_speech.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

0 commit comments

Comments
 (0)