Updated some of the python examples

alkampfergit · alkampfergit · commit f5fb3908edce · 2024-05-11T12:35:28.000+02:00
diff --git a/01_basic/00_test_plugin.ipynb b/01_basic/00_test_plugin.ipynb
@@ -2,54 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Extracting auio file from video S:\\Downloads\\1.mp4\n",
-      "Extracting transcript from audio file S:\\Downloads\\1.wav\n",
-      "Using device: cuda:0 to run whisper with model large-v3\n",
-      "Detected language: Italian\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 737030/737030 [3:52:51<00:00, 52.75frames/s]  "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "extracted 2162 audio segments\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "ename": "UnicodeEncodeError",
-     "evalue": "'charmap' codec can't encode character '\\u1edb' in position 2725: character maps to <undefined>",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mUnicodeEncodeError\u001b[0m                        Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[4], line 8\u001b[0m\n\u001b[0;32m      6\u001b[0m file \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mS:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mDownloads\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124m1.mp4\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m      7\u001b[0m audio \u001b[38;5;241m=\u001b[39m av\u001b[38;5;241m.\u001b[39mextract_audio(file)\n\u001b[1;32m----> 8\u001b[0m \u001b[43mav\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranscript_timeline\u001b[49m\u001b[43m(\u001b[49m\u001b[43maudio\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlarge-v3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mc:\\develop\\github\\SemanticKernelPlayground\\01_basic\\plugins\\AudioVideoPlugin\\AudioVideo.py:85\u001b[0m, in \u001b[0;36mAudioVideo.transcript_timeline\u001b[1;34m(self, audiofile, model)\u001b[0m\n\u001b[0;32m     83\u001b[0m \u001b[38;5;66;03m# Write all raw_transcription_string to a file\u001b[39;00m\n\u001b[0;32m     84\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(text_path, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[1;32m---> 85\u001b[0m     \u001b[43mfile\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraw_transcription_string\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m transcription_string\n",
-      "File \u001b[1;32m~\\anaconda3\\lib\\encodings\\cp1252.py:19\u001b[0m, in \u001b[0;36mIncrementalEncoder.encode\u001b[1;34m(self, input, final)\u001b[0m\n\u001b[0;32m     18\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mencode\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m, final\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m---> 19\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcodecs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcharmap_encode\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\u001b[43mencoding_table\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n",
-      "\u001b[1;31mUnicodeEncodeError\u001b[0m: 'charmap' codec can't encode character '\\u1edb' in position 2725: character maps to <undefined>"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from plugins.AudioVideoPlugin.AudioVideo import AudioVideo\n",
     "\n",
@@ -63,21 +18,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "NameError",
-     "evalue": "name 'transcription_string' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[5], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m ( \u001b[43mtranscription_string\u001b[49m)\n",
-      "\u001b[1;31mNameError\u001b[0m: name 'transcription_string' is not defined"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print ( transcription_string)"
    ]
diff --git a/01_basic/03_chain_skill_call.ipynb b/01_basic/03_chain_skill_call.ipynb
@@ -68,16 +68,16 @@
     }
    ],
    "source": [
+    "from semantic_kernel import Kernel\n",
     "from semantic_kernel.connectors.ai.open_ai import (\n",
     "    AzureChatCompletion,\n",
     "    AzureTextCompletion,\n",
     ")\n",
     "\n",
     "model = os.getenv(\"AZURE_GPT4_MODEL\", \"gpt4\")\n",
     "endpoint = os.getenv(\"OPENAI_API_BASE\")\n",
-    "kernel = sk.Kernel(log=logger)\n",
-    "kernel.add_chat_service(\n",
-    "    \"chat_completion\",\n",
+    "kernel = Kernel(log=logger)\n",
+    "kernel.add_service(\n",
     "    AzureChatCompletion(\n",
     "        model,\n",
     "        endpoint = endpoint,\n",
@@ -100,21 +100,52 @@
     "# Now you can import the plugin importing skill directly from the function you declared\n",
     "# in the plugin directory. The import_skill does not need the path, it only need an\n",
     "# instance of the skill and the name of the skill\n",
-    "extractaudio_plugin = kernel.import_skill(AudioVideo(), skill_name=\"AudioVideoPlugin\")\n",
+    "extractaudio_plugin = kernel.add_plugin(AudioVideo(), \"AudioVideoPlugin\")\n",
     "\n",
     "plugins_directory = \"./plugins\"\n",
     "\n",
     "# Import the OrchestratorPlugin from the plugins directory.\n",
-    "publishing_plugin = kernel.import_semantic_skill_from_directory(\n",
-    "    plugins_directory, \"PublishingPlugin\"\n",
+    "publishing_plugin = kernel.add_plugin(\n",
+    "    parent_directory=plugins_directory, \n",
+    "    plugin_name=\"PublishingPlugin\"\n",
     ")\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "KernelFunctionFromMethod(metadata=KernelFunctionMetadata(name='ExtractAudio', plugin_name='AudioVideoPlugin', description='extract audio in wav format from an mp4 file', parameters=[KernelParameterMetadata(name='videofile', description='Full path to the mp4 file', default_value=None, type_='str', is_required=True, type_object=<class 'str'>)], is_prompt=False, is_asynchronous=False, return_parameter=KernelParameterMetadata(name='return', description='output audio file path', default_value=None, type_='str', is_required=True, type_object=None)), method=<bound method AudioVideo.extract_audio of <plugins.AudioVideoPlugin.AudioVideo.AudioVideo object at 0x00000212EC39C250>>, stream_method=None)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "extractaudio_plugin[\"ExtractAudio\"] #This is how you can call the plug"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "KernelFunctionFromMethod(metadata=KernelFunctionMetadata(name='ExtractAudio', plugin_name='AudioVideoPlugin', description='extract audio in wav format from an mp4 file', parameters=[KernelParameterMetadata(name='videofile', description='Full path to the mp4 file', default_value=None, type_='str', is_required=True, type_object=<class 'str'>)], is_prompt=False, is_asynchronous=False, return_parameter=KernelParameterMetadata(name='return', description='output audio file path', default_value=None, type_='str', is_required=True, type_object=None)), method=<bound method AudioVideo.extract_audio of <plugins.AudioVideoPlugin.AudioVideo.AudioVideo object at 0x00000212EC39C250>>, stream_method=None)\n",
+      "KernelFunctionFromMethod(metadata=KernelFunctionMetadata(name='TranscriptTimeline', plugin_name='AudioVideoPlugin', description='Transcript audio from a wav file to a timeline', parameters=[KernelParameterMetadata(name='audiofile', description='Full path to the wav file', default_value=None, type_='str', is_required=True, type_object=<class 'str'>)], is_prompt=False, is_asynchronous=False, return_parameter=KernelParameterMetadata(name='return', description='', default_value=None, type_='str', is_required=True, type_object=None)), method=<bound method AudioVideo.transcript_timeline of <plugins.AudioVideoPlugin.AudioVideo.AudioVideo object at 0x00000212EC39C250>>, stream_method=None)\n",
+      "KernelFunctionFromPrompt(metadata=KernelFunctionMetadata(name='VideoTimelineCreator', plugin_name='PublishingPlugin', description='Given a video transcript it can summarize and generate a timeline', parameters=[KernelParameterMetadata(name='input', description='', default_value='', type_='', is_required=True, type_object=None)], is_prompt=True, is_asynchronous=True, return_parameter=KernelParameterMetadata(name='return', description='The completion result', default_value=None, type_='FunctionResult', is_required=True, type_object=None)), prompt_template=KernelPromptTemplate(prompt_template_config=PromptTemplateConfig(name='VideoTimelineCreator', description='Given a video transcript it can summarize and generate a timeline', template='I will give you a transcript of a video. The transcript contains phrases prefixed by the timestamp where the phrase starts. I want you to identify between three and ten main sections of the video. You must never identify more than ten sections.\\nFor each section you will create a brief title prefixed with the start timestamp of the section obtained analyzing all the text belonging to that section.\\n\\nEXAMPLE ANSWER - Maximum of ten sections\\n00:00 - Title of section 1\\n00:33 - Title of section 2\\n01:23 - Title of section 3\\n\\n[DATA]\\n{{$input}}', template_format='semantic-kernel', input_variables=[InputVariable(name='input', description='', default='', is_required=True, json_schema='')], execution_settings={})), prompt_execution_settings={})\n"
+     ]
+    }
+   ],
    "source": [
     "from pprint import pprint\n",
     "# want to print all the keys of extractaudio_plugin that is a dictionary\n",
@@ -126,9 +157,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# you can verify if cuda is available.\n",
     "import torch\n",
@@ -137,18 +179,63 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Extracting auio file from video S:\\OneDrive\\Youtube\\AI\\SemanticChain\\MontaggiCompleti\\010-CsharpIntro.mp4\n",
+      "S:\\OneDrive\\Youtube\\AI\\SemanticChain\\MontaggiCompleti\\010-CsharpIntro.wav\n"
+     ]
+    }
+   ],
    "source": [
-    "# Now we can use the skill\n",
-    "# Run the Sqrt function with the context.\n",
-    "result = await kernel.run_async(\n",
+    "import time\n",
+    "\n",
+    "result = await kernel.invoke(\n",
     "    extractaudio_plugin[\"ExtractAudio\"],\n",
+    "    #videofile =\"S:\\\\OneDrive\\\\Youtube\\\\AI\\\\SemanticChain\\\\MontaggiCompleti\\\\250-NlpPrecisionRecallRerank.mp4\"\n",
+    "    videofile =\"S:\\\\OneDrive\\\\Youtube\\\\AI\\\\SemanticChain\\\\MontaggiCompleti\\\\010-CsharpIntro.mp4\"\n",
+    ")\n",
+    "\n",
+    "print (result)\n",
+    "\n",
+    "time.sleep(1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Extracting transcript from audio file S:\\OneDrive\\Youtube\\AI\\SemanticChain\\MontaggiCompleti\\010-CsharpIntro.wav\n",
+      "Using device: cuda:0 to run whisper with model large-v3\n",
+      "Detected language: English\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 18%|█▊        | 8598/46811 [08:58<47:41, 13.36frames/s]"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "# now invoke the plugin to transcript\n",
+    "audio_transcript = await kernel.invoke(\n",
     "    extractaudio_plugin[\"TranscriptTimeline\"],\n",
-    "    publishing_plugin[\"VideoTimelineCreator\"],\n",
-    "    input_str=\"\"\"S:\\\\OneDrive\\\\Youtube\\\\AI\\\\SemanticChain\\\\MontaggiCompleti\\\\200-intro-kernel-memory.mp4\"\"\"\n",
-    ")\n"
+    "    audiofile = result\n",
+    ")\n",
+    "\n",
+    "print (audio_transcript)\n"
    ]
   },
   {
@@ -157,7 +244,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(result)"
+    "\n",
+    "# now invoke the plugin to create the video timeline\n",
+    "video_summarization = await kernel.invoke(\n",
+    "    publishing_plugin[\"VideoTimelineCreator\"],\n",
+    "    input = audio_transcript\n",
+    ")\n",
+    "print (video_summarization)"
    ]
   }
  ],
diff --git a/01_basic/plugins/AudioVideoPlugin/AudioVideo.py b/01_basic/plugins/AudioVideoPlugin/AudioVideo.py
@@ -1,17 +1,19 @@
 import subprocess
-from semantic_kernel.skill_definition import sk_function
+from typing import Annotated
+from semantic_kernel.functions import kernel_function
 import os
 import whisper
 import torch  
 
 class AudioVideo:
 
-    @sk_function(
+    @kernel_function(
         description="extract audio in wav format from an mp4 file",
         name="ExtractAudio",
-        input_description="Full path to the mp4 file",
     )
-    def extract_audio(self, videofile: str) -> str:
+    def extract_audio(
+        self, 
+        videofile: Annotated[str, "Full path to the mp4 file"]) -> Annotated[str, "output audio file path"]:
         """
         Extract audio from a video file and return the full path to the extracted file.
 
@@ -33,12 +35,13 @@ def extract_audio(self, videofile: str) -> str:
         # now ffmpeg has created the audio file, return the path to it
         return audio_path
 
-    @sk_function(
+    @kernel_function(
         description="Transcript audio from a wav file to a timeline",
         name="TranscriptTimeline",
-        input_description="Full path to the wav file",
     )
-    def transcript_timeline(self, audiofile: str) -> str:
+    def transcript_timeline(
+        self, 
+        audiofile: Annotated[str, "Full path to the wav file"]) -> str:
 
         """
         Extract a transcript from an audio file and return a transcript file that
@@ -47,7 +50,7 @@ def transcript_timeline(self, audiofile: str) -> str:
         :param audiofile: Full path to the wav file 
         :return: transcripted text with start and end time
         """
-        model: str =  "medium.en" #"large-v3" #"medium.en"
+        model: str =  "medium.en" ##"tiny.en" #"large-v3" #"medium.en"
         print(f"Extracting transcript from audio file {audiofile}")
         # model = whisper.load_model("medium.en")
 
diff --git a/requirements.txt b/requirements.txt