diff --git a/duui-video-to-audio/.dockeringore b/duui-video-to-audio/.dockeringore
new file mode 100644
index 00000000..9d9808c2
--- /dev/null
+++ b/duui-video-to-audio/.dockeringore
@@ -0,0 +1,4 @@
+.idea/
+target/
+venv/
+*.mp4
diff --git a/duui-video-to-audio/.gitignore b/duui-video-to-audio/.gitignore
new file mode 100644
index 00000000..f5f085f0
--- /dev/null
+++ b/duui-video-to-audio/.gitignore
@@ -0,0 +1,3 @@
+.idea/
+target/
+venv*/
diff --git a/duui-video-to-audio/README.md b/duui-video-to-audio/README.md
new file mode 100644
index 00000000..df2f23c3
--- /dev/null
+++ b/duui-video-to-audio/README.md
@@ -0,0 +1,13 @@
+#### Video2Audio component for
+
+Uses ffmpeg-python package to convert video to audio.
+
+#### Input/Output:
+
+input: video
+
+output: audio
+
+#### Parameter:
+
+none
diff --git a/duui-video-to-audio/pom.xml b/duui-video-to-audio/pom.xml
new file mode 100644
index 00000000..f5c4d81e
--- /dev/null
+++ b/duui-video-to-audio/pom.xml
@@ -0,0 +1,155 @@
+
+
+ 4.0.0
+
+ org.texttechnologylab.duui
+ duui_text_to_image
+ 0.1.0
+
+
+
+ AGPL-3.0-or-later
+ https://www.gnu.org/licenses/agpl.txt
+ repo
+ GNU Affero General Public License v3.0 or later
+
+
+
+
+ Texttechnology Lab
+ https://www.texttechnologylab.org
+
+
+
+ mehler
+ Prof. Dr. Alexander Mehler
+ mehler@em.uni-frankfurt.de
+ https://www.texttechnologylab.org/team/alexander-abrami/
+ Goethe University Frankfurt / Texttechnology Lab
+ https://www.texttechnologylab.org
+
+ head of department
+
+
+
+ aabusale
+ Ali Abusaleh
+ a.abusaleh@em.uni-frankfurt.de
+ https://www.texttechnologylab.org/team/ali-abusaleh/
+ Goethe University Frankfurt / Texttechnology Lab
+ https://www.texttechnologylab.org
+
+ Research assistant
+
+ Europe/Berlin
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 2.22.0
+
+
+ --illegal-access=permit
+ --add-opens java.base/java.util=ALL-UNNAMED
+
+
+
+
+
+
+
+
+
+ 17
+ 17
+ 2.4.0
+
+
+
+
+
+
+ jitpack.io
+ https://jitpack.io
+
+
+
+
+
+
+ org.dkpro.core
+ dkpro-core-asl
+ ${dkpro.core.version}
+ pom
+ import
+
+
+
+
+
+
+
+ com.github.texttechnologylab
+ DockerUnifiedUIMAInterface
+ 1.4
+
+
+
+
+
+
+
+
+ com.github.texttechnologylab
+ UIMATypeSystem
+ 3.0.5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter
+ 5.9.0
+ test
+
+
+
+ org.dkpro.core
+ dkpro-core-api-segmentation-asl
+ test
+
+
+
+ org.dkpro.core
+ dkpro-core-io-xmi-asl
+ test
+
+
+
+ org.dkpro.core
+ dkpro-core-api-resources-asl
+ test
+
+
+
\ No newline at end of file
diff --git a/duui-video-to-audio/python/TypeSystem.xml b/duui-video-to-audio/python/TypeSystem.xml
new file mode 100644
index 00000000..a86c216a
--- /dev/null
+++ b/duui-video-to-audio/python/TypeSystem.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/duui-video-to-audio/python/duui-video-to-audio.py b/duui-video-to-audio/python/duui-video-to-audio.py
new file mode 100644
index 00000000..117ce245
--- /dev/null
+++ b/duui-video-to-audio/python/duui-video-to-audio.py
@@ -0,0 +1,122 @@
+from typing import List, Optional, Dict, Union
+from time import time
+from fastapi import FastAPI, Response
+from fastapi.encoders import jsonable_encoder
+from cassis import load_typesystem
+from functools import lru_cache
+from io import BytesIO
+import base64
+import uvicorn
+
+import warnings
+import os
+
+import ffmpeg
+import sys
+
+from starlette.responses import PlainTextResponse, JSONResponse
+
+
+def convert_to_mp3(video_path):
+ # Define the output audio path
+ audio_path = "tempAudio.mp3"
+
+ # Use ffmpeg to extract audio from the video
+ ffmpeg.input(video_path).output(audio_path).run()
+
+ # Read the audio file and convert it to base64
+ with open(audio_path, "rb") as audio_file:
+ audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
+
+ # Remove the temporary files
+ os.remove(video_path)
+ os.remove(audio_path)
+
+ return audio_base64
+
+
+class DUUIRequest(BaseModel):
+ # The texts language
+ video: str
+
+class DUUIResponse(BaseModel):
+ # The texts language
+ audio: str
+ mimetype: str
+
+app = FastAPI(
+ openapi_url="/openapi.json",
+ docs_url="/api",
+ redoc_url=None,
+ title= "Video2Audio",
+ description="Video To Audio Component",
+ version= "1.0",
+ terms_of_service="https://www.texttechnologylab.org/legal_notice/",
+ contact={
+ "name": "Peter Kannewitz",
+ "email": "pk35momo@studserv.uni-leipzig.de",
+ },
+ license_info={
+ "name": "AGPL",
+ "url": "http://www.gnu.org/licenses/agpl-3.0.en.html",
+ },
+)
+
+lua_communication_script_filename = "duui_video_to_audio.lua"
+
+with open(lua_communication_script_filename, 'rb') as f:
+ lua_communication_script = f.read().decode("utf-8")
+
+typesystem_filename = 'TypeSystem.xml'
+with open(typesystem_filename, 'rb') as f:
+ typesystem = load_typesystem(f)
+
+
+@app.get("/v1/typesystem")
+def get_typesystem() -> Response:
+ # TODO rimgve cassis dependency, as only needed for typesystem at the moment?
+ xml = typesystem.to_xml()
+ xml_content = xml.encode("utf-8")
+
+ return Response(
+ content=xml_content,
+ media_type="application/xml"
+ )
+
+# Return Lua communication script
+@app.get("/v1/communication_layer", response_class=PlainTextResponse)
+def get_communication_layer() -> str:
+ return lua_communication_script
+
+# Process request from DUUI
+@app.post("/v1/process")
+def post_process(request: DUUIRequest):
+ # Fetch model-related information
+ # this video is base64
+ video = request.video
+
+ try:
+ # convert base64 to video mp4, and saved it locallz
+ with open("tempVideo.mp4", "wb") as f:
+ f.write(base64.b64decode(request.video))
+
+ # 1 - read the file
+ # 2- extract the audio
+ # 3- covert the audio into base64
+ # 4- send back base64 audio
+ # Convert the video to MP3 and get the base64-encoded audio
+ audio_base64 = convert_to_mp3("tempVideo.mp4")
+
+ # Print or send back the base64-encoded audio
+ print(audio_base64)
+
+ return DUUIResponse(
+ audio = audio_base64,
+ mimetype = "audio/mp3"
+ )
+ except Exception as e:
+ print(str(e))
+
+
+if __name__ == "__main__":
+ uvicorn.run("duui-video-to-audio:app", host="0.0.0.0", port=9714, workers=1)
diff --git a/duui-video-to-audio/python/duui_video_to_audio.lua b/duui-video-to-audio/python/duui_video_to_audio.lua
new file mode 100644
index 00000000..fb3551a4
--- /dev/null
+++ b/duui-video-to-audio/python/duui_video_to_audio.lua
@@ -0,0 +1,39 @@
+-- Bind static classes from java
+StandardCharsets = luajava.bindClass("java.nio.charset.StandardCharsets")
+util = luajava.bindClass("org.apache.uima.fit.util.JCasUtil")
+
+-- This "serialize" function is called to transform the CAS object into an stream that is sent to the annotator
+-- Inputs:
+-- - inputCas: The actual CAS object to serialize
+-- - outputStream: Stream that is sent to the annotator, can be e.g. a string, JSON payload, ...
+function serialize(inputCas, outputStream, params)
+ -- Get data from CAS
+ print("Start serialize")
+ local videoBase64 = inputCas:getSofaDataString() --inputCas:getView(audioView):getSofaDataString()
+ print ("Video ", videoBase64)
+ -- Encode data as JSON object and write to stream
+ outputStream:write(json.encode({
+ video = videoBase64,
+ language = language
+ }))
+end
+
+-- This "deserialize" function is called on receiving the results from the annotator that have to be transformed into a CAS object
+-- Inputs:
+-- - inputCas: The actual CAS object to deserialize into
+-- - inputStream: Stream that is received from to the annotator, can be e.g. a string, JSON payload, ...
+function deserialize(inputCas, inputStream)
+ --print("deserialize")
+ -- Get string from stream, assume UTF-8 encoding
+ local inputString = luajava.newInstance("java.lang.String", inputStream:readAllBytes(), StandardCharsets.UTF_8)
+
+ -- Parse JSON data from string into object
+ local results = json.decode(inputString)
+
+ --print("results", results)
+ -- Add tokens to jcas
+ if results["audio"] ~= nil then
+
+ inputCas:setSofaDataString(results["audio"], "audio/mp3")
+ end
+end
\ No newline at end of file
diff --git a/duui-video-to-audio/requirements.txt b/duui-video-to-audio/requirements.txt
new file mode 100644
index 00000000..38d2d139
--- /dev/null
+++ b/duui-video-to-audio/requirements.txt
@@ -0,0 +1,9 @@
+scipy==1.13.1
+protobuf==4.25.3
+fastapi==0.110.0
+dkpro-cassis==0.9.1
+uvicorn[standard]==0.27.1
+pydantic-settings==2.0.2
+six==1.16.0
+peft==0.10.0
+ffmpeg-python
\ No newline at end of file
diff --git a/duui-video-to-audio/src/test/java/Video2Audio.java b/duui-video-to-audio/src/test/java/Video2Audio.java
new file mode 100644
index 00000000..5768f961
--- /dev/null
+++ b/duui-video-to-audio/src/test/java/Video2Audio.java
@@ -0,0 +1,109 @@
+import org.apache.commons.compress.compressors.CompressorException;
+import org.apache.commons.io.FileUtils;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.DUUIComposer;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIRemoteDriver;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIUIMADriver;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.lua.DUUILuaContext;
+import org.xml.sax.SAXException;
+
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Base64;
+
+import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
+import org.texttechnologylab.DockerUnifiedUIMAInterface.io.writer.TTLabXmiWriter;
+import org.dkpro.core.io.xmi.XmiWriter;
+
+import javax.imageio.ImageIO;
+
+public class Video2Audio {
+
+ ClassLoader classLoader = Video2Audio.class.getClassLoader();
+ URL fVideo = classLoader.getResource("interview2.mp4");
+
+ private static void saveBase64ToAudio(String base64String, String outputPath) {
+ try {
+ // Decode the Base64 string into a byte array
+ byte[] decodedBytes = Base64.getDecoder().decode(base64String);
+
+ // Save the image to the specified output file
+ File outputFile = new File(outputPath);
+ Files.write(outputFile.toPath(), decodedBytes);
+
+ System.out.println("Video saved as: " + outputPath);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Test
+ @DisplayName("ExtractionTest")
+ public void ExtractionTest() throws Exception {
+
+
+ DUUIComposer composer = new DUUIComposer().withSkipVerification(true)
+ .withLuaContext(new DUUILuaContext().withJsonLibrary());
+
+ DUUIUIMADriver duuiuimaDriver = new DUUIUIMADriver();
+ DUUIRemoteDriver remoteDriver = new DUUIRemoteDriver();
+ composer.addDriver(remoteDriver, duuiuimaDriver);
+
+
+ File fFile = new File(fVideo.getPath());
+ byte[] bFile = FileUtils.readFileToByteArray(fFile);
+ String encodedString = Base64.getEncoder().encodeToString(bFile);
+ String pMimeType = Files.probeContentType(Path.of(fVideo.getPath()));
+
+ JCas pCas = JCasFactory.createText("Programm");
+
+ JCas videoView = pCas.createView("video");
+ JCas transcript = pCas.createView("transcript");
+ videoView.setSofaDataString(encodedString, pMimeType);
+ videoView.setDocumentLanguage("de");
+
+ JCas audioView = pCas.createView("audio");
+
+ composer.add(
+ new DUUIRemoteDriver.Component("http://localhost:9714")
+ .withTargetView("audio")
+ .withSourceView("video")
+ .withScale(1).build());
+
+
+
+ composer.add(new DUUIRemoteDriver.Component("http://whisperx.lehre.texttechnologylab.org")
+ .withScale(1)
+ .withSourceView("audio")
+ .withTargetView("transcript")
+ .build()
+ );
+
+ composer.add(new DUUIUIMADriver.Component(createEngineDescription(XmiWriter.class,
+ XmiWriter.PARAM_TARGET_LOCATION, "/tmp/xmi/",
+ XmiWriter.PARAM_PRETTY_PRINT, true,
+ XmiWriter.PARAM_OVERWRITE, true,
+ XmiWriter.PARAM_COMPRESSION, "GZIP")).build());
+
+ composer.run(pCas);
+
+ saveBase64ToAudio(audioView.getSofa().getSofaString(), "/tmp/audio/test_audi.mp3");
+
+
+ }
+
+}