Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions duui-video-to-audio/.dockeringore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.idea/
target/
venv/
*.mp4
3 changes: 3 additions & 0 deletions duui-video-to-audio/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.idea/
target/
venv*/
13 changes: 13 additions & 0 deletions duui-video-to-audio/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#### Video2Audio component for

Uses ffmpeg-python package to convert video to audio.

#### Input/Output:

input: video

output: audio

#### Parameter:

none
155 changes: 155 additions & 0 deletions duui-video-to-audio/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.texttechnologylab.duui</groupId>
<artifactId>duui_text_to_image</artifactId>
<version>0.1.0</version>

<licenses>
<license>
<name>AGPL-3.0-or-later</name>
<url>https://www.gnu.org/licenses/agpl.txt</url>
<distribution>repo</distribution>
<comments>GNU Affero General Public License v3.0 or later</comments>
</license>
</licenses>

<organization>
<name>Texttechnology Lab</name>
<url>https://www.texttechnologylab.org</url>
</organization>
<developers>
<developer>
<id>mehler</id>
<name>Prof. Dr. Alexander Mehler</name>
<email>[email protected]</email>
<url>https://www.texttechnologylab.org/team/alexander-abrami/</url>
<organization>Goethe University Frankfurt / Texttechnology Lab</organization>
<organizationUrl>https://www.texttechnologylab.org</organizationUrl>
<roles>
<role>head of department</role>
</roles>
</developer>
<developer>
<id>aabusale</id>
<name>Ali Abusaleh</name>
<email>[email protected]</email>
<url>https://www.texttechnologylab.org/team/ali-abusaleh/</url>
<organization>Goethe University Frankfurt / Texttechnology Lab</organization>
<organizationUrl>https://www.texttechnologylab.org</organizationUrl>
<roles>
<role>Research assistant</role>
</roles>
<timezone>Europe/Berlin</timezone>
</developer>
</developers>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.0</version>
<configuration>
<argLine>
--illegal-access=permit
--add-opens java.base/java.util=ALL-UNNAMED
<!-- add-opens for use in JUnit-Tests...-->
</argLine>
</configuration>
</plugin>
</plugins>
</build>


<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<dkpro.core.version>2.4.0</dkpro.core.version>
<!-- <ttlab.duui.version>f68ca579ab553074f76d061623dc9b00cf508276</ttlab.duui.version>-->
<!-- <ttlab.typesystem.version>033beaa593a99c005400f4021ea8d6fa8957e6c3</ttlab.typesystem.version>-->
</properties>

<repositories>
<repository>
<id>jitpack.io</id>
<url>https://jitpack.io</url>
</repository>
</repositories>

<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-asl</artifactId>
<version>${dkpro.core.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>

<dependencies>
<!--<dependency>
<groupId>com.github.texttechnologylab</groupId>
<artifactId>DockerUnifiedUIMAInterface</artifactId>
<version>${ttlab.duui.version}</version>
</dependency>-->
<dependency>
<groupId>com.github.texttechnologylab</groupId>
<artifactId>DockerUnifiedUIMAInterface</artifactId>
<version>1.4</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>com.github.texttechnologylab.textimager-uima</groupId>-->
<!-- <artifactId>textimager-uima-util</artifactId>-->
<!-- <version>${ttlab.textimager.typesystem.version}</version>-->
<!-- </dependency>-->

<dependency>
<groupId>com.github.texttechnologylab</groupId>
<artifactId>UIMATypeSystem</artifactId>
<version>3.0.5</version>
</dependency>

<!-- <dependency>-->
<!-- <groupId>org.texttechnologylab.annotation</groupId>-->
<!-- <artifactId>typesystem</artifactId>-->
<!-- <version>3.0.1</version>-->
<!-- </dependency>-->

<!-- <dependency>-->
<!-- <groupId>org.texttechnologylab</groupId>-->
<!-- <artifactId>DockerUnifiedUIMAInterface</artifactId>-->
<!-- <version>1.3</version>-->
<!-- </dependency>-->

<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.9.0</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-segmentation-asl</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-io-xmi-asl</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-resources-asl</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
6 changes: 6 additions & 0 deletions duui-video-to-audio/python/TypeSystem.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
<types>

</types>
</typeSystemDescription>
122 changes: 122 additions & 0 deletions duui-video-to-audio/python/duui-video-to-audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from typing import List, Optional, Dict, Union
from time import time
from fastapi import FastAPI, Response
from fastapi.encoders import jsonable_encoder
from cassis import load_typesystem
from functools import lru_cache
from io import BytesIO
import base64
import uvicorn

import warnings
import os

import ffmpeg
import sys

from starlette.responses import PlainTextResponse, JSONResponse


def convert_to_mp3(video_path):
# Define the output audio path
audio_path = "tempAudio.mp3"

# Use ffmpeg to extract audio from the video
ffmpeg.input(video_path).output(audio_path).run()

# Read the audio file and convert it to base64
with open(audio_path, "rb") as audio_file:
audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')

# Remove the temporary files
os.remove(video_path)
os.remove(audio_path)

return audio_base64


class DUUIRequest(BaseModel):
# The texts language
video: str

class DUUIResponse(BaseModel):
# The texts language
audio: str
mimetype: str

app = FastAPI(
openapi_url="/openapi.json",
docs_url="/api",
redoc_url=None,
title= "Video2Audio",
description="Video To Audio Component",
version= "1.0",
terms_of_service="https://www.texttechnologylab.org/legal_notice/",
contact={
"name": "Peter Kannewitz",
"email": "[email protected]",
},
license_info={
"name": "AGPL",
"url": "http://www.gnu.org/licenses/agpl-3.0.en.html",
},
)

lua_communication_script_filename = "duui_video_to_audio.lua"

with open(lua_communication_script_filename, 'rb') as f:
lua_communication_script = f.read().decode("utf-8")

typesystem_filename = 'TypeSystem.xml'
with open(typesystem_filename, 'rb') as f:
typesystem = load_typesystem(f)


@app.get("/v1/typesystem")
def get_typesystem() -> Response:
# TODO rimgve cassis dependency, as only needed for typesystem at the moment?
xml = typesystem.to_xml()
xml_content = xml.encode("utf-8")

return Response(
content=xml_content,
media_type="application/xml"
)

# Return Lua communication script
@app.get("/v1/communication_layer", response_class=PlainTextResponse)
def get_communication_layer() -> str:
return lua_communication_script

# Process request from DUUI
@app.post("/v1/process")
def post_process(request: DUUIRequest):
# Fetch model-related information
# this video is base64
video = request.video

try:
# convert base64 to video mp4, and saved it locallz
with open("tempVideo.mp4", "wb") as f:
f.write(base64.b64decode(request.video))

# 1 - read the file
# 2- extract the audio
# 3- covert the audio into base64
# 4- send back base64 audio
# Convert the video to MP3 and get the base64-encoded audio
audio_base64 = convert_to_mp3("tempVideo.mp4")

# Print or send back the base64-encoded audio
print(audio_base64)

return DUUIResponse(
audio = audio_base64,
mimetype = "audio/mp3"
)
except Exception as e:
print(str(e))


if __name__ == "__main__":
uvicorn.run("duui-video-to-audio:app", host="0.0.0.0", port=9714, workers=1)
39 changes: 39 additions & 0 deletions duui-video-to-audio/python/duui_video_to_audio.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
-- Bind static classes from java
StandardCharsets = luajava.bindClass("java.nio.charset.StandardCharsets")
util = luajava.bindClass("org.apache.uima.fit.util.JCasUtil")

-- This "serialize" function is called to transform the CAS object into an stream that is sent to the annotator
-- Inputs:
-- - inputCas: The actual CAS object to serialize
-- - outputStream: Stream that is sent to the annotator, can be e.g. a string, JSON payload, ...
function serialize(inputCas, outputStream, params)
-- Get data from CAS
print("Start serialize")
local videoBase64 = inputCas:getSofaDataString() --inputCas:getView(audioView):getSofaDataString()
print ("Video ", videoBase64)
-- Encode data as JSON object and write to stream
outputStream:write(json.encode({
video = videoBase64,
language = language
}))
end

-- This "deserialize" function is called on receiving the results from the annotator that have to be transformed into a CAS object
-- Inputs:
-- - inputCas: The actual CAS object to deserialize into
-- - inputStream: Stream that is received from to the annotator, can be e.g. a string, JSON payload, ...
function deserialize(inputCas, inputStream)
--print("deserialize")
-- Get string from stream, assume UTF-8 encoding
local inputString = luajava.newInstance("java.lang.String", inputStream:readAllBytes(), StandardCharsets.UTF_8)

-- Parse JSON data from string into object
local results = json.decode(inputString)

--print("results", results)
-- Add tokens to jcas
if results["audio"] ~= nil then

inputCas:setSofaDataString(results["audio"], "audio/mp3")
end
end
9 changes: 9 additions & 0 deletions duui-video-to-audio/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
scipy==1.13.1
protobuf==4.25.3
fastapi==0.110.0
dkpro-cassis==0.9.1
uvicorn[standard]==0.27.1
pydantic-settings==2.0.2
six==1.16.0
peft==0.10.0
ffmpeg-python
Loading