grycap
diff --git a/‎crates/kokoro-tts/README.md‎
Lines changed: 29 additions & 0 deletions b/‎crates/kokoro-tts/README.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎crates/kokoro-tts/fdl.yml‎
Lines changed: 15 additions & 0 deletions b/‎crates/kokoro-tts/fdl.yml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎crates/kokoro-tts/icon.png‎
1.32 MB b/‎crates/kokoro-tts/icon.png‎
1.32 MB
diff --git a/‎crates/kokoro-tts/input.json‎
Lines changed: 10 additions & 0 deletions b/‎crates/kokoro-tts/input.json‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎crates/kokoro-tts/ro-crate-metadata.json‎
Lines changed: 276 additions & 0 deletions b/‎crates/kokoro-tts/ro-crate-metadata.json‎
Lines changed: 276 additions & 0 deletions
diff --git a/‎crates/kokoro-tts/script.sh‎
Lines changed: 8 additions & 0 deletions b/‎crates/kokoro-tts/script.sh‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎crates/vosk-stt/README.md‎
Lines changed: 8 additions & 0 deletions b/‎crates/vosk-stt/README.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎crates/vosk-stt/fdl.yml‎
Lines changed: 15 additions & 0 deletions b/‎crates/vosk-stt/fdl.yml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎crates/vosk-stt/icon.png‎
1.49 MB b/‎crates/vosk-stt/icon.png‎
1.49 MB
diff --git a/‎crates/vosk-stt/input_en.wav‎
184 KB b/‎crates/vosk-stt/input_en.wav‎
184 KB
@@ -0,0 +1,29 @@
+# Kokoro TTS Service for OSCAR
+
+This service contains the configuration necessary to implement a Text-to-Speech (TTS) service using the [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) model. The service is optimized to run asynchronously on the CPU only, allowing its deployment on infrastructures without a GPU, including AMD64 and ARM64 architectures.
+
+Kokoro is an open-source TTS model with 82 million parameters. Despite its lightweight architecture, it offers comparable quality to larger models, while being significantly faster and more cost-effective. Thanks to its Apache-licensed weights, it can be deployed in any environment.
+
+To run the service, a .json file must be used that contains both the message to be processed and the execution configuration parameters. This makes the service flexible for any environment. The service's input file must have the following structure.
+
+```json
+{
+  "model": "af_bella",
+  "language": "en-gb",
+  "message": "This is an audio sample generated using the kokoro-tts service.",
+  "config": {
+    "speed": 1.0,
+    "volume": 3.1,
+    "output": "wav"
+     }
+}
+```
+
+Description of the configuration parameters:
+
+* model: Voice identifier (all available models in [voices](https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX/tree/main/voices)).
+* language: Language of the text to be processed ([lang_code](https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md)).
+* speed: Speech speed (0.5 to 2).
+* volume: Output audio volume level.
+* output: Output audio file format (for example: "mp3","wav", "flac").
+
@@ -0,0 +1,15 @@
+functions:
+  oscar:
+  - oscar-cluster:
+     name: kokoro-tts
+     memory: 3Gi
+     cpu: '2.0'
+     image: ghcr.io/grycap/kokoro-tts:lastest
+     script: script.sh
+     -ttslog_level: CRITICAL
+     input:
+     - storage_provider: minio.default
+       path: kokoro-tts/input
+     output:
+     - storage_provider: minio.default
+       path: kokoro-tts/output
@@ -0,0 +1,10 @@
+{
+  "model": "af_bella",
+  "language": "en-gb",
+  "message": "This is an audio sample generated using the kokoro-tts service.",
+  "config": {
+    "speed": 1.0,
+    "volume": 3.1,
+    "output": "wav"
+     }
+}
@@ -0,0 +1,276 @@
+{
+  "@context": [
+    "https://w3id.org/ro/crate/1.1/context"
+  ],
+  "@graph": [
+    {
+      "@type": "CreativeWork",
+      "@id": "ro-crate-metadata.json",
+      "conformsTo": {
+        "@id": "https://w3id.org/ro/crate/1.1"
+      },
+      "about": {
+        "@id": "./"
+      }
+    },
+    {
+      "@id": "./",
+      "@type": [
+        "Dataset",
+        "Service",
+        "SoftwareApplication"
+      ],
+      "datePublished": "2026-02-13",
+      "URL": "https://github.com/grycap/oscar-hub/tree/main/crates/kokoro-tts",
+      "name": "Kokoro TTS Service",
+      "description": "The Kokoro TTS service is a speech synthesis solution that transforms text into multilingual audio optimized for CPU architectures (AMD/ARM). ",
+      "license": {
+        "@id": "https://www.apache.org/licenses/LICENSE-2.0"
+      },
+      "applicationCategory": "OSCAR Service",
+      "memoryRequirements": "3 GiB",
+      "processorRequirements": [
+        "2 vCPU",
+        "0 GPU"
+      ],
+      "serviceType":"asynchronous",
+      "isBasedOn": [
+        {
+          "@id": "https://huggingface.co/hexgrad/Kokoro-82M"
+        }
+      ],
+      "author": {
+        "@id": "https://orcid.org/0000-0002-7335-3849"
+      },
+      "subjectOf": [
+        {
+          "@id": "#acceptance-test-async"
+        }
+      ],
+      "hasPart": [
+        {
+          "@id": "fdl.yml"
+        },
+        {
+          "@id": "script.sh"
+        },
+        {
+          "@id": "icon.png"
+        },
+        {
+          "@id": "input.json"
+        },
+        {
+          "@id": "#expected-audio"
+        }
+      ]
+    },
+    {
+      "@id": "fdl.yml",
+      "@type": [
+        "File",
+        "SoftwareSourceCode"
+      ],
+      "name": "OSCAR Service Definition",
+      "url": "https://raw.githubusercontent.com/grycap/oscar-hub/refs/heads/main/crates/kokoro-tts/fdl.yml",
+      "encodingFormat": "text/yaml"
+    },
+    {
+      "@id": "script.sh",
+      "@type": [
+        "File",
+        "SoftwareSourceCode"
+      ],
+      "name": "OSCAR Service Script",
+      "url": "https://raw.githubusercontent.com/grycap/oscar-hub/refs/heads/main/crates/kokoro-tts/script.sh",
+      "encodingFormat": "text/x-shellscript"
+    },
+    {
+      "@id": "icon.png",
+      "@type": [
+        "File",
+        "ImageObject"
+      ],
+      "name": "OSCAR Service Icon",
+      "url": "https://raw.githubusercontent.com/grycap/oscar-hub/refs/heads/main/crates/kokoro-tts/icon.png",
+      "encodingFormat": "image/png"
+    },
+    {
+      "@id": "input.json",
+      "@type": [
+        "File"
+      ],
+      "name": "Sample configuration file",
+      "description": "Replace with a representative .json file containing the message to be processed and its configuration to run the kokoro-tts model.",
+      "url": "https://raw.githubusercontent.com/grycap/oscar-hub/refs/heads/main/crates/kokoro-tts/input.json",
+      "encodingFormat": "text/json"
+    },
+    {
+      "@id": "#acceptance-test-async",
+      "@type": "HowTo",
+      "name": "Asynchronous Kokoro-tts acceptance test",
+      "description": "Upload the sample file, wait for the audio file to be created, and then download the file in the specified format.",
+      "tool": [
+        {
+          "@id": "#tool-oscar-cli"
+        }
+      ],
+      "supply": [
+        {
+          "@id": "#supply-sample-json"
+        }
+      ],
+      "step": [
+        {
+          "@id": "#step-async-put"
+        },
+        {
+          "@id": "#step-async-wait"
+        },
+        {
+          "@id": "#step-async-get"
+        }
+      ]
+    },
+    {
+      "@id": "#tool-oscar-cli",
+      "@type": "HowToTool",
+      "name": "OSCAR CLI",
+      "item": {
+        "@id": "#oscar-cli"
+      }
+    },
+    {
+      "@id": "#supply-sample-json",
+      "@type": "HowToSupply",
+      "name": "Sample json file",
+      "item": {
+        "@id": "input.json"
+      }
+    },
+    {
+      "@id": "#expected-audio",
+      "@type": [
+        "File"
+      ],
+      "name": "Expected audio output",
+      "description": "Audio file produced by the service that contains the message with the specified characteristics.",
+      "encodingFormat": "audio/wav"
+    },
+    {
+      "@id": "#step-async-put",
+      "@type": "HowToStep",
+      "position": 1,
+      "text": "Upload the sample configuration file to the service's storage.",
+      "potentialAction": {
+        "@id": "#action-async-put"
+      }
+    },
+    {
+      "@id": "#step-async-wait",
+      "@type": "HowToStep",
+      "position": 2,
+      "text": "Wait for the audio output according to specifications to occur",
+      "timeRequired": "PT60S"
+    },
+    {
+      "@id": "#step-async-get",
+      "@type": "HowToStep",
+      "position": 3,
+      "text": "Download the latest output file and confirm that it is in the expected format.",
+      "potentialAction": {
+        "@id": "#action-async-get"
+      }
+    },
+    {
+      "@id": "#action-async-put",
+      "@type": "TransferAction",
+      "name": "service put-file",
+      "object": {
+        "@id": "input.json"
+      },
+      "target": {
+        "@id": "#entry-async-put"
+      },
+      "additionalProperty": [
+        {
+          "@id": "#command-template-async-put"
+        }
+      ]
+    },
+    {
+      "@id": "#entry-async-put",
+      "@type": "EntryPoint",
+      "actionApplication": {
+        "@id": "#oscar-cli"
+      }
+    },
+    {
+      "@id": "#command-template-async-put",
+      "@type": "PropertyValue",
+      "propertyID": "commandTemplate",
+      "value": "oscar-cli service put-file kokoro-tts {source}"
+    },
+    {
+      "@id": "#action-async-get",
+      "@type": "TransferAction",
+      "name": "service get-file",
+      "target": {
+        "@id": "#entry-async-get"
+      },
+      "result": {
+        "@id": "#expected-json"
+      },
+      "additionalProperty": [
+        {
+          "@id": "#command-template-async-get"
+        }
+      ]
+    },
+    {
+      "@id": "#entry-async-get",
+      "@type": "EntryPoint",
+      "actionApplication": {
+        "@id": "#oscar-cli"
+      }
+    },
+    {
+      "@id": "#command-template-async-get",
+      "@type": "PropertyValue",
+      "propertyID": "commandTemplate",
+      "value": "oscar-cli service get-file kokoro-tts --download-latest-into {destination}"
+    },
+    {
+      "@id": "#oscar-cli",
+      "@type": "SoftwareApplication",
+      "name": "OSCAR CLI",
+      "url": "https://github.com/grycap/oscar-cli"
+    },
+    {
+      "@id": "https://orcid.org/0000-0002-7335-3849",
+      "@type": "Person",
+      "affiliation": {
+        "@id": "UPV"
+      },
+      "name": "Vicente Rodriguez"
+    },
+    {
+      "@id": "https://www.apache.org/licenses/LICENSE-2.0",
+      "@type": "CreativeWork",
+      "name": "Apache License 2.0",
+      "identifier": "SPDX:Apache-2.0"
+    },
+    {
+      "@id": "https://huggingface.co/hexgrad/Kokoro-82M",
+      "@type": "SoftwareApplication",
+      "name": "Kokoro-82M",
+      "description": "Kokoro is an open-weight TTS model with 82 million parameters."
+    },
+    {
+      "@id": "UPV",
+      "@type": "Organization",
+      "name": "Universitat Politècnica de València",
+      "url": "https://www.upv.es"
+    }
+  ]
+}
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+echo "--- Initiating Kokoro TTS Processing ---"
+set -e
+FILENAME_BASE=$(basename "$INPUT_FILE_PATH" .json)
+OUTPUT_BASE="$TMP_OUTPUT_DIR/${FILENAME_BASE}"
+
+python3 /app/kokoro_factory.py "$INPUT_FILE_PATH" "$OUTPUT_BASE"
@@ -0,0 +1,8 @@
+# Vosk STT Service for OSCAR 
+
+[Vosk](https://alphacephei.com/vosk/) is a fully offline, open-source speech-to-text (STT) toolkit. Its architecture is notable for its efficiency, using lightweight models that enable near-instantaneous responses even on modest hardware, and for its exceptional robustness, capable of accurately processing background noise and technical terms. It also offers versatile multi-language support.
+
+The Vosk STT service processes an audio file and returns the result as a text file. It is currently built on [models](https://alphacephei.com/vosk/models) to process English and Spanish audio. It cannot recognize the speaker's language, so the input file must have the following structure: **audio.wav** for Spanish audio and **audio_en.wav** for English audio. It supports any input audio file format. The output format is a .txt file containing the text generated by the speech recognition process.
+
+
+
@@ -0,0 +1,15 @@
+functions:
+  oscar:
+  - oscar-cluster:
+      name: vosk-stt
+      image: ghcr.io/grycap/vosk-stt:v1.0
+      memory: 3Gi
+      cpu: '2.0'
+      script: script.sh
+      log_level: CRITICAL
+      input:
+        - storage_provider: minio.default
+          path: vosk-stt/input
+      output:
+        - storage_provider: minio.default
+          path: vosk-stt/output