OpenAI - Add audio functionality to Chat action (#14367)

michelle0927 · malexanderlim · commit 019f1cb24f7a · 2024-10-29T09:47:33.000-07:00
* accept audio input

* versions

* add configuration error
diff --git a/components/openai/actions/chat/chat.mjs b/components/openai/actions/chat/chat.mjs
@@ -1,11 +1,12 @@
 import openai from "../../openai.app.mjs";
 import common from "../common/common.mjs";
 import constants from "../../common/constants.mjs";
+import { ConfigurationError } from "@pipedream/platform";
 
 export default {
   ...common,
   name: "Chat",
-  version: "0.2.0",
+  version: "0.2.1",
   key: "openai-chat",
   description: "The Chat API, using the `gpt-3.5-turbo` or `gpt-4` model. [See the documentation](https://platform.openai.com/docs/api-reference/chat)",
   type: "action",
@@ -38,7 +39,13 @@ export default {
     images: {
       label: "Images",
       type: "string[]",
-      description: "Provide one or more images to [OpenAI's vision model](https://platform.openai.com/docs/guides/vision). Accepts URLs or base64 encoded strings. Compatible with the `gpt4-vision-preview model`",
+      description: "Provide one or more images to [OpenAI's vision model](https://platform.openai.com/docs/guides/vision). Accepts URLs or base64 encoded strings. Compatible with the `gpt4-vision-preview` model",
+      optional: true,
+    },
+    audio: {
+      type: "string",
+      label: "Audio",
+      description: "Provide the file path to an audio file in the `/tmp` directory. For use with the `gpt-4o-audio-preview` model. Currently supports `wav` and `mp3` files.",
       optional: true,
     },
     responseFormat: {
@@ -65,6 +72,10 @@ export default {
     };
   },
   async run({ $ }) {
+    if (this.audio && !this.modelId.includes("gpt-4o-audio-preview")) {
+      throw new ConfigurationError("Use of audio files requires using the `gpt-4o-audio-preview` model.");
+    }
+
     const args = this._getChatArgs();
 
     const response = await this.openai.createChatCompletion({
diff --git a/components/openai/actions/classify-items-into-categories/classify-items-into-categories.mjs b/components/openai/actions/classify-items-into-categories/classify-items-into-categories.mjs
@@ -3,7 +3,7 @@ import common from "../common/common-helper.mjs";
 export default {
   ...common,
   name: "Classify Items into Categories",
-  version: "0.1.0",
+  version: "0.1.1",
   key: "openai-classify-items-into-categories",
   description: "Classify items into specific categories using the Chat API. [See the documentation](https://platform.openai.com/docs/api-reference/chat)",
   type: "action",
diff --git a/components/openai/actions/common/common.mjs b/components/openai/actions/common/common.mjs
@@ -1,6 +1,7 @@
 import { ConfigurationError } from "@pipedream/platform";
 import constants from "../../common/constants.mjs";
 import { parse } from "../../common/helpers.mjs";
+import fs from "fs";
 
 const CHAT_DOCS_MESSAGE_FORMAT_URL = "https://platform.openai.com/docs/guides/chat/introduction";
 
@@ -92,6 +93,20 @@ export default {
         }
       }
 
+      if (this.audio) {
+        const fileContent = fs.readFileSync(this.audio.includes("tmp/")
+          ? this.audio
+          : `/tmp/${this.audio}`).toString("base64");
+        const extension = this.audio.match(/\.(\w+)$/)?.[1];
+        content.push({
+          type: "input_audio",
+          input_audio: {
+            data: fileContent,
+            format: extension,
+          },
+        });
+      }
+
       content.push({
         "type": "text",
         "text": this.userMessage,
diff --git a/components/openai/actions/create-embeddings/create-embeddings.mjs b/components/openai/actions/create-embeddings/create-embeddings.mjs
@@ -4,7 +4,7 @@ import common from "../common/common.mjs";
 
 export default {
   name: "Create Embeddings",
-  version: "0.0.12",
+  version: "0.0.13",
   key: "openai-create-embeddings",
   description: "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. [See the documentation](https://platform.openai.com/docs/api-reference/embeddings)",
   type: "action",
diff --git a/components/openai/actions/create-transcription/create-transcription.mjs b/components/openai/actions/create-transcription/create-transcription.mjs
@@ -24,7 +24,7 @@ const pipelineAsync = promisify(stream.pipeline);
 
 export default {
   name: "Create Transcription (Whisper)",
-  version: "0.1.12",
+  version: "0.1.13",
   key: "openai-create-transcription",
   description: "Transcribes audio into the input language. [See the documentation](https://platform.openai.com/docs/api-reference/audio/create).",
   type: "action",
diff --git a/components/openai/actions/send-prompt/send-prompt.mjs b/components/openai/actions/send-prompt/send-prompt.mjs
@@ -4,7 +4,7 @@ import common from "../common/common.mjs";
 export default {
   ...common,
   name: "Create Completion (Send Prompt)",
-  version: "0.1.11",
+  version: "0.1.12",
   key: "openai-send-prompt",
   description: "OpenAI recommends using the **Chat** action for the latest `gpt-3.5-turbo` API, since it's faster and 10x cheaper. This action creates a completion for the provided prompt and parameters using the older `/completions` API. [See the documentation](https://beta.openai.com/docs/api-reference/completions/create)",
   type: "action",
diff --git a/components/openai/actions/summarize/summarize.mjs b/components/openai/actions/summarize/summarize.mjs
@@ -4,7 +4,7 @@ import constants from "../../common/constants.mjs";
 export default {
   ...common,
   name: "Summarize Text",
-  version: "0.1.0",
+  version: "0.1.1",
   key: "openai-summarize",
   description: "Summarizes text using the Chat API. [See the documentation](https://platform.openai.com/docs/api-reference/chat)",
   type: "action",
diff --git a/components/openai/actions/translate-text/translate-text.mjs b/components/openai/actions/translate-text/translate-text.mjs
@@ -9,7 +9,7 @@ const langOptions = lang.LANGUAGES.map((l) => ({
 export default {
   ...common,
   name: "Translate Text (Whisper)",
-  version: "0.1.0",
+  version: "0.1.1",
   key: "openai-translate-text",
   description: "Translate text from one language to another using the Chat API. [See the documentation](https://platform.openai.com/docs/api-reference/chat)",
   type: "action",
diff --git a/components/openai/package.json b/components/openai/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@pipedream/openai",
-  "version": "0.6.0",
+  "version": "0.6.1",
   "description": "Pipedream OpenAI Components",
   "main": "openai.app.mjs",
   "keywords": [

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@pipedream/openai",`
`3`		`- "version": "0.6.0",`
	`3`	`+ "version": "0.6.1",`
`4`	`4`	`"description": "Pipedream OpenAI Components",`
`5`	`5`	`"main": "openai.app.mjs",`
`6`	`6`	`"keywords": [`