diff --git a/nbs/Stable_Audio_API.ipynb b/nbs/Stable_Audio_API.ipynb index 2be251ea..08ef25f4 100644 --- a/nbs/Stable_Audio_API.ipynb +++ b/nbs/Stable_Audio_API.ipynb @@ -46,7 +46,7 @@ } ], "source": [ - "#@title Text to Audio\n", + "#@title Stable Audio 2: Text to Audio\n", "\n", "prompt = \"Genre: UK Bass | Instruments: 707 Drum Machine, Strings, 808 bass stabs, Beautiful Synths\" #@param {type:\"string\"}\n", "duration = 190 #@param {type:\"number\"}\n", @@ -54,7 +54,71 @@ "steps = 50 #@param {type:\"number\"}\n", "cfg_scale = 7.0 #@param {type:\"number\"}\n", "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", + "response = requests.post(\n", + " \"https://api.stability.ai/v2beta/audio/stable-audio-2/text-to-audio\",\n", + " headers={\"Authorization\": f\"Bearer {STABILITY_KEY}\", \"Accept\": \"audio/*\"},\n", + " files={\"image\": None},\n", + " data={\n", + " \"prompt\" : prompt,\n", + " \"duration\": duration,\n", + " \"seed\": seed,\n", + " \"steps\": steps,\n", + " \"cfg_scale\" : cfg_scale,\n", + " \"output_format\": output_format,\n", + " \"model\": \"stable-audio-2\"\n", + " }\n", + ")\n", + "if not response.ok:\n", + " raise Exception(f\"HTTP {response.status_code}: {response.text}\")\n", + "\n", + "# Save and show the result\n", + "filename = f\"txt2audio.mp3\"\n", + "with open(filename, \"wb\") as f:\n", + " f.write(response.content)\n", + "print(f\"Saved {filename}\")\n", "\n", + "IPython.display.display(IPython.display.Audio(filename))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved txt2audio.mp3\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#@title Stable Audio 2.5: Text to Audio\n", + "\n", + "prompt = \"Genre: UK Bass | Instruments: 707 Drum Machine, Strings, 808 bass stabs, Beautiful Synths\" #@param {type:\"string\"}\n", + "duration = 190 #@param {type:\"number\"}\n", + "seed = 0 #@param {type:\"number\"}\n", + "steps = 8 #@param {type:\"number\"}\n", + "cfg_scale = 1.0 #@param {type:\"number\"}\n", + "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", "response = requests.post(\n", " \"https://api.stability.ai/v2beta/audio/stable-audio-2/text-to-audio\",\n", " headers={\"Authorization\": f\"Bearer {STABILITY_KEY}\", \"Accept\": \"audio/*\"},\n", @@ -66,6 +130,7 @@ " \"steps\": steps,\n", " \"cfg_scale\" : cfg_scale,\n", " \"output_format\": output_format,\n", + " \"model\": \"stable-audio-2.5\"\n", " }\n", ")\n", "if not response.ok:\n", @@ -138,7 +203,7 @@ } ], "source": [ - "#@title Audio to Audio\n", + "#@title Stable Audio 2: Audio to Audio\n", "\n", "#@markdown - Drag and drop a .wav or .mp3 to file folder on left\n", "#@markdown - Right click on it and choose Copy path\n", @@ -152,7 +217,7 @@ "steps = 50 #@param {type:\"number\"}\n", "cfg_scale = 7.0 #@param {type:\"number\"}\n", "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", - "strength = 1.0 #@param {type:\"number\"}\n", + "strength = 0.5 #@param {type:\"number\"}\n", "\n", "response = requests.post(\n", " \"https://api.stability.ai/v2beta/audio/stable-audio-2/audio-to-audio\",\n", @@ -166,6 +231,112 @@ " \"cfg_scale\" : cfg_scale,\n", " \"output_format\": output_format,\n", " \"strength\": strength,\n", + " \"model\": \"stable-audio-2\"\n", + " }\n", + ")\n", + "if not response.ok:\n", + " raise Exception(f\"HTTP {response.status_code}: {response.text}\")\n", + "\n", + "# Save and show the result\n", + "filename = f\"audio2audio.mp3\"\n", + "with open(filename, \"wb\") as f:\n", + " f.write(response.content)\n", + "print(f\"Saved {filename}\")\n", + "\n", + "print(\"\\nOriginal audio:\")\n", + "IPython.display.display(IPython.display.Audio(audio))\n", + "\n", + "print(\"\\nGeneration result:\")\n", + "IPython.display.display(IPython.display.Audio(filename))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved audio2audio.mp3\n", + "\n", + "Original audio:\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Generation result:\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#@title Stable Audio 2.5: Audio to Audio\n", + "\n", + "#@markdown - Drag and drop a .wav or .mp3 to file folder on left\n", + "#@markdown - Right click on it and choose Copy path\n", + "#@markdown - Paste that path into audio field below\n", + "#@markdown

\n", + "\n", + "prompt = \"Lofi hip hop beat, chillhop\" #@param {type:\"string\"}\n", + "audio = \"/content/piano.mp3\" #@param {type:\"string\"}\n", + "duration = 45 #@param {type:\"number\"}\n", + "seed = 0 #@param {type:\"number\"}\n", + "steps = 8 #@param {type:\"number\"}\n", + "cfg_scale = 1.0 #@param {type:\"number\"}\n", + "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", + "strength = 0.5 #@param {type:\"number\"}\n", + "\n", + "response = requests.post(\n", + " \"https://api.stability.ai/v2beta/audio/stable-audio-2/audio-to-audio\",\n", + " headers={\"Authorization\": f\"Bearer {STABILITY_KEY}\", \"Accept\": \"audio/*\"},\n", + " files={\"audio\": open(audio, \"rb\")},\n", + " data={\n", + " \"prompt\" : prompt,\n", + " \"duration\": duration,\n", + " \"seed\": seed,\n", + " \"steps\": steps,\n", + " \"cfg_scale\" : cfg_scale,\n", + " \"output_format\": output_format,\n", + " \"strength\": strength,\n", + " \"model\": \"stable-audio-2.5\"\n", " }\n", ")\n", "if not response.ok:\n", @@ -190,8 +361,107 @@ "metadata": { "id": "TDWW5DQbDnNo" }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved inpaint.mp3\n", + "\n", + "Original audio:\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Generation result:\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#@title Inpaint\n", + "\n", + "#@markdown - Drag and drop a .wav or .mp3 to file folder on left\n", + "#@markdown - Right click on it and choose Copy path\n", + "#@markdown - Paste that path into audio field below\n", + "#@markdown

\n", + "\n", + "prompt = \"Lofi hip hop beat, chillhop\" #@param {type:\"string\"}\n", + "audio = \"/content/piano.mp3\" #@param {type:\"string\"}\n", + "duration = 45 #@param {type:\"number\"}\n", + "seed = 0 #@param {type:\"number\"}\n", + "steps = 8 #@param {type:\"number\"}\n", + "cfg_scale = 1.0 #@param {type:\"number\"}\n", + "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", + "mask_start = 15.0 #@param {type:\"number\"}\n", + "mask_end = 40.0 #@param {type:\"number\"}\n", + "\n", + "response = requests.post(\n", + " \"https://api.stability.ai/v2beta/audio/stable-audio-2/inpaint\",\n", + " headers={\"Authorization\": f\"Bearer {STABILITY_KEY}\", \"Accept\": \"audio/*\"},\n", + " files={\"audio\": open(audio, \"rb\")},\n", + " data={\n", + " \"prompt\" : prompt,\n", + " \"duration\": duration,\n", + " \"seed\": seed,\n", + " \"steps\": steps,\n", + " \"cfg_scale\" : cfg_scale,\n", + " \"output_format\": output_format,\n", + " \"mask_start\": mask_start,\n", + " \"mask_end\": mask_end\n", + " }\n", + ")\n", + "if not response.ok:\n", + " raise Exception(f\"HTTP {response.status_code}: {response.text}\")\n", + "\n", + "# Save and show the result\n", + "filename = f\"inpaint.mp3\"\n", + "with open(filename, \"wb\") as f:\n", + " f.write(response.content)\n", + "print(f\"Saved {filename}\")\n", + "\n", + "print(\"\\nOriginal audio:\")\n", + "IPython.display.display(IPython.display.Audio(audio))\n", + "\n", + "print(\"\\nGeneration result:\")\n", + "IPython.display.display(IPython.display.Audio(filename))" + ] } ], "metadata": { @@ -199,7 +469,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "venv", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -213,7 +483,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.15" + "version": "3.13.3" } }, "nbformat": 4, diff --git a/nbs/Stable_Image_API_Public.ipynb b/nbs/Stable_Image_API_Public.ipynb index a604243b..f247064d 100644 --- a/nbs/Stable_Image_API_Public.ipynb +++ b/nbs/Stable_Image_API_Public.ipynb @@ -1018,7 +1018,9 @@ "- Creative Upscaler: This service will transform a low quality, low resolution image into a stunning work of art with intricate details at 9mp resolution - regardless of the input resolution. Provide an input image of poor quality and add a `prompt` that describes the desired output. High `creativity` (up to 0.35) will yield dramatic changes to the image.\n", "This service will use 25 credits.\n", "\n", - "- Conservative Upscaler: Upscale and image to 4K resolution while minimizing alterations to the image. This service will use 3 credits.\n", + "- Conservative Upscaler: Upgrade low-res to 4k without reinterpreting the image. This service will use 25 credits.\n", + "\n", + "- Fast Upscaler: Simple, low-cost upscaler to increase image resolution by 4, up to 4 megapixels. This service will use 1 credit.\n", "\n", "See https://platform.stability.ai/docs/api-reference#tag/Upscale\n", "\n",