Merge pull request Samagra-Development#262 from Samagra-Development/lang_detect

Gautam-Rajeev · web-flow · commit 03da8cd7e79c · 2023-09-29T11:31:54.000+05:30
Added lang detect
diff --git a/src/asr/whisper_lang_rec/local/Dockerfile b/src/asr/whisper_lang_rec/local/Dockerfile
@@ -0,0 +1,21 @@
+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+
+WORKDIR /app
+
+# Install requirements
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
+
+# Update aptitude with new repo info, and install FFmpeg
+RUN apt-get update \
+    && apt-get install -y ffmpeg \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy the rest of the application code to the working directory
+COPY . /app/
+EXPOSE 8000
+
+# Set the entrypoint for the container
+CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"]
diff --git a/src/asr/whisper_lang_rec/local/README.md b/src/asr/whisper_lang_rec/local/README.md
@@ -0,0 +1,11 @@
+### Testing the model deployment :  
+To run for testing  you can follow the following steps : 
+
+- Git clone the repo
+- Go to current folder location i.e. ``` cd /src/asr/whisper_lang_rec/local ```
+- Create docker image file and test the api:  
+```
+docker build -t testmodel .
+docker run -p 8000:8000 testmodel
+curl -X POST -F "file=@male.wav" -F "n_seconds=5" http://localhost:8000/
+```
diff --git a/src/asr/whisper_lang_rec/local/__init__.py b/src/asr/whisper_lang_rec/local/__init__.py
@@ -0,0 +1,2 @@
+from .request import ModelRequest
+from .request import Model
diff --git a/src/asr/whisper_lang_rec/local/api.py b/src/asr/whisper_lang_rec/local/api.py
@@ -0,0 +1,45 @@
+from model import Model
+from request import ModelRequest
+from quart import Quart, request
+from quart_cors import cors  # Import the cors function
+import aiohttp
+import os
+import tempfile
+import os 
+
+
+app = Quart(__name__)
+app = cors(app)  # Apply the cors function to your app to enable CORS for all routes
+
+model = None
+
+@app.before_serving
+async def startup():
+    app.client = aiohttp.ClientSession()
+    global model
+    model = Model(app)
+
+@app.route('/', methods=['POST'])
+async def embed():
+    global model
+
+    temp_dir = tempfile.mkdtemp()
+    data = await request.form  
+    files = await request.files  
+    uploaded_file = files.get('file')  
+
+    file_path = os.path.join(temp_dir, uploaded_file.filename) 
+    await uploaded_file.save(file_path)
+
+    n_seconds = int(data.get('n_seconds'))  
+    req = ModelRequest(wav_file=file_path, n_seconds=n_seconds)  
+    response = await model.inference(req)  # Removed n_seconds here
+
+    os.remove(file_path)
+    os.rmdir(temp_dir)
+    
+    return response
+
+
+if __name__ == "__main__":
+    app.run()
diff --git a/src/asr/whisper_lang_rec/local/model.py b/src/asr/whisper_lang_rec/local/model.py
@@ -0,0 +1,56 @@
+import torch
+import torchaudio
+import whisper  
+from request import ModelRequest 
+import tempfile
+import os 
+
+class Model():
+    def __new__(cls, context):
+        cls.context = context
+        if not hasattr(cls, 'instance'):
+            cls.instance = super(Model, cls).__new__(cls)
+        
+        # Load Whisper model
+        cls.model = whisper.load_model("base")
+        cls.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        cls.model.to(cls.device)
+        return cls.instance
+
+    def trim_audio(self, audio_path, n_seconds):
+        audio, sr = torchaudio.load(audio_path)
+        total_duration = audio.shape[1] / sr  # Total duration of the audio in seconds
+
+        # If the audio duration is less than n_seconds, don't trim the audio
+        if total_duration < n_seconds:
+            print(f"The audio duration ({total_duration:.2f}s) is less than {n_seconds}s. Using the full audio.")
+            return audio, sr
+
+        num_samples = int(n_seconds * sr)
+        audio = audio[:, :num_samples]
+        return audio, sr
+
+    async def inference(self, request: ModelRequest):
+        # The n_seconds is now accessed from the request object
+        n_seconds = request.n_seconds  
+        trimmed_audio, sr = self.trim_audio(request.wav_file, n_seconds)
+
+        # Save the trimmed audio to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:  # Add a file extension
+            torchaudio.save(temp_file.name, trimmed_audio, sr)
+
+            # Process the audio with Whisper
+            audio = whisper.load_audio(temp_file.name)
+            audio = whisper.pad_or_trim(audio)
+
+        # Clean up the temporary file
+        os.unlink(temp_file.name)
+
+        mel = whisper.log_mel_spectrogram(audio).to(self.device)  
+        # Detect the spoken language
+        _, probs = self.model.detect_language(mel) 
+        detected_language = max(probs, key=probs.get)
+
+        return detected_language
+
+
diff --git a/src/asr/whisper_lang_rec/local/request.py b/src/asr/whisper_lang_rec/local/request.py
@@ -0,0 +1,12 @@
+import requests
+import json
+
+
+class ModelRequest():
+    def __init__(self, wav_file,n_seconds):
+        self.wav_file = wav_file
+        self.n_seconds = n_seconds
+
+    def to_json(self):
+        return json.dumps(self, default=lambda o: o.__dict__,
+                          sort_keys=True, indent=4)
diff --git a/src/asr/whisper_lang_rec/local/requirements.txt b/src/asr/whisper_lang_rec/local/requirements.txt
@@ -0,0 +1,8 @@
+torch
+torchaudio
+transformers
+quart
+aiohttp
+librosa
+quart-cors
+openai-whisper

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .request import ModelRequest`
	`2`	`+from .request import Model`