texttechnologylab · PeterK-end · Mar 25, 2025
diff --git a/duui-video-to-audio/.dockeringore b/duui-video-to-audio/.dockeringore
@@ -0,0 +1,4 @@
+.idea/
+target/
+venv/
+*.mp4
diff --git a/duui-video-to-audio/.gitignore b/duui-video-to-audio/.gitignore
@@ -0,0 +1,3 @@
+.idea/
+target/
+venv*/
diff --git a/duui-video-to-audio/README.md b/duui-video-to-audio/README.md
@@ -0,0 +1,13 @@
+#### Video2Audio component for
+
+Uses ffmpeg-python package to convert video to audio.
+
+#### Input/Output:
+
+input: video
+
+output: audio
+
+#### Parameter:
+
+none
diff --git a/duui-video-to-audio/pom.xml b/duui-video-to-audio/pom.xml
@@ -0,0 +1,155 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>org.texttechnologylab.duui</groupId>
+    <artifactId>duui_text_to_image</artifactId>
+    <version>0.1.0</version>
+
+    <licenses>
+        <license>
+            <name>AGPL-3.0-or-later</name>
+            <url>https://www.gnu.org/licenses/agpl.txt</url>
+            <distribution>repo</distribution>
+            <comments>GNU Affero General Public License v3.0 or later</comments>
+        </license>
+    </licenses>
+
+    <organization>
+        <name>Texttechnology Lab</name>
+        <url>https://www.texttechnologylab.org</url>
+    </organization>
+    <developers>
+        <developer>
+            <id>mehler</id>
+            <name>Prof. Dr. Alexander Mehler</name>
+            <email>[email protected]</email>
+            <url>https://www.texttechnologylab.org/team/alexander-abrami/</url>
+            <organization>Goethe University Frankfurt / Texttechnology Lab</organization>
+            <organizationUrl>https://www.texttechnologylab.org</organizationUrl>
+            <roles>
+                <role>head of department</role>
+            </roles>
+        </developer>
+        <developer>
+            <id>aabusale</id>
+            <name>Ali Abusaleh</name>
+            <email>[email protected]</email>
+            <url>https://www.texttechnologylab.org/team/ali-abusaleh/</url>
+            <organization>Goethe University Frankfurt / Texttechnology Lab</organization>
+            <organizationUrl>https://www.texttechnologylab.org</organizationUrl>
+            <roles>
+                <role>Research assistant</role>
+            </roles>
+            <timezone>Europe/Berlin</timezone>
+        </developer>
+    </developers>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <version>2.22.0</version>
+                <configuration>
+                    <argLine>
+                        --illegal-access=permit
+                        --add-opens java.base/java.util=ALL-UNNAMED
+                        <!--                        add-opens for use in JUnit-Tests...-->
+                    </argLine>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+
+    <properties>
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
+        <dkpro.core.version>2.4.0</dkpro.core.version>
+        <!--        <ttlab.duui.version>f68ca579ab553074f76d061623dc9b00cf508276</ttlab.duui.version>-->
+        <!--        <ttlab.typesystem.version>033beaa593a99c005400f4021ea8d6fa8957e6c3</ttlab.typesystem.version>-->
+    </properties>
+
+    <repositories>
+        <repository>
+            <id>jitpack.io</id>
+            <url>https://jitpack.io</url>
+        </repository>
+    </repositories>
+
+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>org.dkpro.core</groupId>
+                <artifactId>dkpro-core-asl</artifactId>
+                <version>${dkpro.core.version}</version>
+                <type>pom</type>
+                <scope>import</scope>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
+
+    <dependencies>
+        <!--<dependency>
+            <groupId>com.github.texttechnologylab</groupId>
+            <artifactId>DockerUnifiedUIMAInterface</artifactId>
+            <version>${ttlab.duui.version}</version>
+        </dependency>-->
+        <dependency>
+            <groupId>com.github.texttechnologylab</groupId>
+            <artifactId>DockerUnifiedUIMAInterface</artifactId>
+            <version>1.4</version>
+        </dependency>
+        <!--        <dependency>-->
+        <!--            <groupId>com.github.texttechnologylab.textimager-uima</groupId>-->
+        <!--            <artifactId>textimager-uima-util</artifactId>-->
+        <!--            <version>${ttlab.textimager.typesystem.version}</version>-->
+        <!--        </dependency>-->
+
+        <dependency>
+            <groupId>com.github.texttechnologylab</groupId>
+            <artifactId>UIMATypeSystem</artifactId>
+            <version>3.0.5</version>
+        </dependency>
+
+        <!--        <dependency>-->
+        <!--            <groupId>org.texttechnologylab.annotation</groupId>-->
+        <!--            <artifactId>typesystem</artifactId>-->
+        <!--            <version>3.0.1</version>-->
+        <!--        </dependency>-->
+
+        <!--        <dependency>-->
+        <!--            <groupId>org.texttechnologylab</groupId>-->
+        <!--            <artifactId>DockerUnifiedUIMAInterface</artifactId>-->
+        <!--            <version>1.3</version>-->
+        <!--        </dependency>-->
+
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter</artifactId>
+            <version>5.9.0</version>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.dkpro.core</groupId>
+            <artifactId>dkpro-core-api-segmentation-asl</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.dkpro.core</groupId>
+            <artifactId>dkpro-core-io-xmi-asl</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.dkpro.core</groupId>
+            <artifactId>dkpro-core-api-resources-asl</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+</project>
diff --git a/duui-video-to-audio/python/TypeSystem.xml b/duui-video-to-audio/python/TypeSystem.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+
+    </types>
+</typeSystemDescription>
diff --git a/duui-video-to-audio/python/duui-video-to-audio.py b/duui-video-to-audio/python/duui-video-to-audio.py
@@ -0,0 +1,122 @@
+from typing import List, Optional, Dict, Union
+from time import time
+from fastapi import FastAPI, Response
+from fastapi.encoders import jsonable_encoder
+from cassis import load_typesystem
+from functools import lru_cache
+from io import BytesIO
+import base64
+import uvicorn
+
+import warnings
+import os
+
+import ffmpeg
+import sys
+
+from starlette.responses import PlainTextResponse, JSONResponse
+
+
+def convert_to_mp3(video_path):
+    # Define the output audio path
+    audio_path = "tempAudio.mp3"
+
+    # Use ffmpeg to extract audio from the video
+    ffmpeg.input(video_path).output(audio_path).run()
+
+    # Read the audio file and convert it to base64
+    with open(audio_path, "rb") as audio_file:
+        audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
+
+    # Remove the temporary files
+    os.remove(video_path)
+    os.remove(audio_path)
+
+    return audio_base64
+
+
+class DUUIRequest(BaseModel):
+    # The texts language
+    video: str
+
+class DUUIResponse(BaseModel):
+    # The texts language
+    audio: str
+    mimetype: str
+
+app = FastAPI(
+    openapi_url="/openapi.json",
+    docs_url="/api",
+    redoc_url=None,
+    title= "Video2Audio",
+    description="Video To Audio Component",
+    version= "1.0",
+    terms_of_service="https://www.texttechnologylab.org/legal_notice/",
+    contact={
+        "name": "Peter Kannewitz",
+        "email": "[email protected]",
+    },
+    license_info={
+        "name": "AGPL",
+        "url": "http://www.gnu.org/licenses/agpl-3.0.en.html",
+    },
+)
+
+lua_communication_script_filename = "duui_video_to_audio.lua"
+
+with open(lua_communication_script_filename, 'rb') as f:
+    lua_communication_script = f.read().decode("utf-8")
+
+typesystem_filename = 'TypeSystem.xml'
+with open(typesystem_filename, 'rb') as f:
+    typesystem = load_typesystem(f)
+
+
+@app.get("/v1/typesystem")
+def get_typesystem() -> Response:
+    # TODO rimgve cassis dependency, as only needed for typesystem at the moment?
+    xml = typesystem.to_xml()
+    xml_content = xml.encode("utf-8")
+
+    return Response(
+        content=xml_content,
+        media_type="application/xml"
+    )
+
+# Return Lua communication script
+@app.get("/v1/communication_layer", response_class=PlainTextResponse)
+def get_communication_layer() -> str:
+    return lua_communication_script
+
+# Process request from DUUI
+@app.post("/v1/process")
+def post_process(request: DUUIRequest):
+    # Fetch model-related information
+    # this video is base64
+    video = request.video
+
+    try:
+        # convert base64 to video mp4, and saved it locallz
+        with open("tempVideo.mp4", "wb") as f:
+                f.write(base64.b64decode(request.video))
+
+        # 1 - read the file
+        # 2- extract the audio
+        # 3- covert the audio into base64
+        # 4- send back base64 audio
+        # Convert the video to MP3 and get the base64-encoded audio
+        audio_base64 = convert_to_mp3("tempVideo.mp4")
+
+           # Print or send back the base64-encoded audio
+        print(audio_base64)
+
+        return DUUIResponse(
+             audio = audio_base64,
+             mimetype = "audio/mp3"
+        )
+    except Exception as e:
+        print(str(e))
+
+
+if __name__ == "__main__":
+  uvicorn.run("duui-video-to-audio:app", host="0.0.0.0", port=9714, workers=1)
diff --git a/duui-video-to-audio/python/duui_video_to_audio.lua b/duui-video-to-audio/python/duui_video_to_audio.lua
@@ -0,0 +1,39 @@
+-- Bind static classes from java
+StandardCharsets = luajava.bindClass("java.nio.charset.StandardCharsets")
+util = luajava.bindClass("org.apache.uima.fit.util.JCasUtil")
+
+-- This "serialize" function is called to transform the CAS object into an stream that is sent to the annotator
+-- Inputs:
+--  - inputCas: The actual CAS object to serialize
+--  - outputStream: Stream that is sent to the annotator, can be e.g. a string, JSON payload, ...
+function serialize(inputCas, outputStream, params)
+    -- Get data from CAS
+    print("Start serialize")
+    local videoBase64 = inputCas:getSofaDataString() --inputCas:getView(audioView):getSofaDataString()
+    print ("Video ", videoBase64)
+    -- Encode data as JSON object and write to stream
+    outputStream:write(json.encode({
+        video = videoBase64,
+        language = language
+    }))
+end
+
+-- This "deserialize" function is called on receiving the results from the annotator that have to be transformed into a CAS object
+-- Inputs:
+--  - inputCas: The actual CAS object to deserialize into
+--  - inputStream: Stream that is received from to the annotator, can be e.g. a string, JSON payload, ...
+function deserialize(inputCas, inputStream)
+    --print("deserialize")
+    -- Get string from stream, assume UTF-8 encoding
+    local inputString = luajava.newInstance("java.lang.String", inputStream:readAllBytes(), StandardCharsets.UTF_8)
+
+    -- Parse JSON data from string into object
+    local results = json.decode(inputString)
+
+    --print("results", results)
+    -- Add tokens to jcas
+    if results["audio"] ~= nil then
+
+        inputCas:setSofaDataString(results["audio"], "audio/mp3")
+    end
+end
diff --git a/duui-video-to-audio/requirements.txt b/duui-video-to-audio/requirements.txt
@@ -0,0 +1,9 @@
+scipy==1.13.1
+protobuf==4.25.3
+fastapi==0.110.0
+dkpro-cassis==0.9.1
+uvicorn[standard]==0.27.1
+pydantic-settings==2.0.2
+six==1.16.0
+peft==0.10.0
+ffmpeg-python
-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    .idea/
+    target/
+    venv/
+    *.mp4