|
68 | 68 | } |
69 | 69 | ], |
70 | 70 | "source": [ |
| 71 | + "from semantic_kernel import Kernel\n", |
71 | 72 | "from semantic_kernel.connectors.ai.open_ai import (\n", |
72 | 73 | " AzureChatCompletion,\n", |
73 | 74 | " AzureTextCompletion,\n", |
74 | 75 | ")\n", |
75 | 76 | "\n", |
76 | 77 | "model = os.getenv(\"AZURE_GPT4_MODEL\", \"gpt4\")\n", |
77 | 78 | "endpoint = os.getenv(\"OPENAI_API_BASE\")\n", |
78 | | - "kernel = sk.Kernel(log=logger)\n", |
79 | | - "kernel.add_chat_service(\n", |
80 | | - " \"chat_completion\",\n", |
| 79 | + "kernel = Kernel(log=logger)\n", |
| 80 | + "kernel.add_service(\n", |
81 | 81 | " AzureChatCompletion(\n", |
82 | 82 | " model,\n", |
83 | 83 | " endpoint = endpoint,\n", |
|
100 | 100 | "# Now you can import the plugin importing skill directly from the function you declared\n", |
101 | 101 | "# in the plugin directory. The import_skill does not need the path, it only need an\n", |
102 | 102 | "# instance of the skill and the name of the skill\n", |
103 | | - "extractaudio_plugin = kernel.import_skill(AudioVideo(), skill_name=\"AudioVideoPlugin\")\n", |
| 103 | + "extractaudio_plugin = kernel.add_plugin(AudioVideo(), \"AudioVideoPlugin\")\n", |
104 | 104 | "\n", |
105 | 105 | "plugins_directory = \"./plugins\"\n", |
106 | 106 | "\n", |
107 | 107 | "# Import the OrchestratorPlugin from the plugins directory.\n", |
108 | | - "publishing_plugin = kernel.import_semantic_skill_from_directory(\n", |
109 | | - " plugins_directory, \"PublishingPlugin\"\n", |
| 108 | + "publishing_plugin = kernel.add_plugin(\n", |
| 109 | + " parent_directory=plugins_directory, \n", |
| 110 | + " plugin_name=\"PublishingPlugin\"\n", |
110 | 111 | ")\n" |
111 | 112 | ] |
112 | 113 | }, |
113 | 114 | { |
114 | 115 | "cell_type": "code", |
115 | 116 | "execution_count": null, |
116 | 117 | "metadata": {}, |
117 | | - "outputs": [], |
| 118 | + "outputs": [ |
| 119 | + { |
| 120 | + "data": { |
| 121 | + "text/plain": [ |
| 122 | + "KernelFunctionFromMethod(metadata=KernelFunctionMetadata(name='ExtractAudio', plugin_name='AudioVideoPlugin', description='extract audio in wav format from an mp4 file', parameters=[KernelParameterMetadata(name='videofile', description='Full path to the mp4 file', default_value=None, type_='str', is_required=True, type_object=<class 'str'>)], is_prompt=False, is_asynchronous=False, return_parameter=KernelParameterMetadata(name='return', description='output audio file path', default_value=None, type_='str', is_required=True, type_object=None)), method=<bound method AudioVideo.extract_audio of <plugins.AudioVideoPlugin.AudioVideo.AudioVideo object at 0x00000212EC39C250>>, stream_method=None)" |
| 123 | + ] |
| 124 | + }, |
| 125 | + "execution_count": 5, |
| 126 | + "metadata": {}, |
| 127 | + "output_type": "execute_result" |
| 128 | + } |
| 129 | + ], |
| 130 | + "source": [ |
| 131 | + "extractaudio_plugin[\"ExtractAudio\"] #This is how you can call the plug" |
| 132 | + ] |
| 133 | + }, |
| 134 | + { |
| 135 | + "cell_type": "code", |
| 136 | + "execution_count": 6, |
| 137 | + "metadata": {}, |
| 138 | + "outputs": [ |
| 139 | + { |
| 140 | + "name": "stdout", |
| 141 | + "output_type": "stream", |
| 142 | + "text": [ |
| 143 | + "KernelFunctionFromMethod(metadata=KernelFunctionMetadata(name='ExtractAudio', plugin_name='AudioVideoPlugin', description='extract audio in wav format from an mp4 file', parameters=[KernelParameterMetadata(name='videofile', description='Full path to the mp4 file', default_value=None, type_='str', is_required=True, type_object=<class 'str'>)], is_prompt=False, is_asynchronous=False, return_parameter=KernelParameterMetadata(name='return', description='output audio file path', default_value=None, type_='str', is_required=True, type_object=None)), method=<bound method AudioVideo.extract_audio of <plugins.AudioVideoPlugin.AudioVideo.AudioVideo object at 0x00000212EC39C250>>, stream_method=None)\n", |
| 144 | + "KernelFunctionFromMethod(metadata=KernelFunctionMetadata(name='TranscriptTimeline', plugin_name='AudioVideoPlugin', description='Transcript audio from a wav file to a timeline', parameters=[KernelParameterMetadata(name='audiofile', description='Full path to the wav file', default_value=None, type_='str', is_required=True, type_object=<class 'str'>)], is_prompt=False, is_asynchronous=False, return_parameter=KernelParameterMetadata(name='return', description='', default_value=None, type_='str', is_required=True, type_object=None)), method=<bound method AudioVideo.transcript_timeline of <plugins.AudioVideoPlugin.AudioVideo.AudioVideo object at 0x00000212EC39C250>>, stream_method=None)\n", |
| 145 | + "KernelFunctionFromPrompt(metadata=KernelFunctionMetadata(name='VideoTimelineCreator', plugin_name='PublishingPlugin', description='Given a video transcript it can summarize and generate a timeline', parameters=[KernelParameterMetadata(name='input', description='', default_value='', type_='', is_required=True, type_object=None)], is_prompt=True, is_asynchronous=True, return_parameter=KernelParameterMetadata(name='return', description='The completion result', default_value=None, type_='FunctionResult', is_required=True, type_object=None)), prompt_template=KernelPromptTemplate(prompt_template_config=PromptTemplateConfig(name='VideoTimelineCreator', description='Given a video transcript it can summarize and generate a timeline', template='I will give you a transcript of a video. The transcript contains phrases prefixed by the timestamp where the phrase starts. I want you to identify between three and ten main sections of the video. You must never identify more than ten sections.\\nFor each section you will create a brief title prefixed with the start timestamp of the section obtained analyzing all the text belonging to that section.\\n\\nEXAMPLE ANSWER - Maximum of ten sections\\n00:00 - Title of section 1\\n00:33 - Title of section 2\\n01:23 - Title of section 3\\n\\n[DATA]\\n{{$input}}', template_format='semantic-kernel', input_variables=[InputVariable(name='input', description='', default='', is_required=True, json_schema='')], execution_settings={})), prompt_execution_settings={})\n" |
| 146 | + ] |
| 147 | + } |
| 148 | + ], |
118 | 149 | "source": [ |
119 | 150 | "from pprint import pprint\n", |
120 | 151 | "# want to print all the keys of extractaudio_plugin that is a dictionary\n", |
|
126 | 157 | }, |
127 | 158 | { |
128 | 159 | "cell_type": "code", |
129 | | - "execution_count": null, |
| 160 | + "execution_count": 7, |
130 | 161 | "metadata": {}, |
131 | | - "outputs": [], |
| 162 | + "outputs": [ |
| 163 | + { |
| 164 | + "data": { |
| 165 | + "text/plain": [ |
| 166 | + "True" |
| 167 | + ] |
| 168 | + }, |
| 169 | + "execution_count": 7, |
| 170 | + "metadata": {}, |
| 171 | + "output_type": "execute_result" |
| 172 | + } |
| 173 | + ], |
132 | 174 | "source": [ |
133 | 175 | "# you can verify if cuda is available.\n", |
134 | 176 | "import torch\n", |
|
137 | 179 | }, |
138 | 180 | { |
139 | 181 | "cell_type": "code", |
140 | | - "execution_count": null, |
| 182 | + "execution_count": 8, |
141 | 183 | "metadata": {}, |
142 | | - "outputs": [], |
| 184 | + "outputs": [ |
| 185 | + { |
| 186 | + "name": "stdout", |
| 187 | + "output_type": "stream", |
| 188 | + "text": [ |
| 189 | + "Extracting auio file from video S:\\OneDrive\\Youtube\\AI\\SemanticChain\\MontaggiCompleti\\010-CsharpIntro.mp4\n", |
| 190 | + "S:\\OneDrive\\Youtube\\AI\\SemanticChain\\MontaggiCompleti\\010-CsharpIntro.wav\n" |
| 191 | + ] |
| 192 | + } |
| 193 | + ], |
143 | 194 | "source": [ |
144 | | - "# Now we can use the skill\n", |
145 | | - "# Run the Sqrt function with the context.\n", |
146 | | - "result = await kernel.run_async(\n", |
| 195 | + "import time\n", |
| 196 | + "\n", |
| 197 | + "result = await kernel.invoke(\n", |
147 | 198 | " extractaudio_plugin[\"ExtractAudio\"],\n", |
| 199 | + " #videofile =\"S:\\\\OneDrive\\\\Youtube\\\\AI\\\\SemanticChain\\\\MontaggiCompleti\\\\250-NlpPrecisionRecallRerank.mp4\"\n", |
| 200 | + " videofile =\"S:\\\\OneDrive\\\\Youtube\\\\AI\\\\SemanticChain\\\\MontaggiCompleti\\\\010-CsharpIntro.mp4\"\n", |
| 201 | + ")\n", |
| 202 | + "\n", |
| 203 | + "print (result)\n", |
| 204 | + "\n", |
| 205 | + "time.sleep(1)\n" |
| 206 | + ] |
| 207 | + }, |
| 208 | + { |
| 209 | + "cell_type": "code", |
| 210 | + "execution_count": 9, |
| 211 | + "metadata": {}, |
| 212 | + "outputs": [ |
| 213 | + { |
| 214 | + "name": "stdout", |
| 215 | + "output_type": "stream", |
| 216 | + "text": [ |
| 217 | + "Extracting transcript from audio file S:\\OneDrive\\Youtube\\AI\\SemanticChain\\MontaggiCompleti\\010-CsharpIntro.wav\n", |
| 218 | + "Using device: cuda:0 to run whisper with model large-v3\n", |
| 219 | + "Detected language: English\n" |
| 220 | + ] |
| 221 | + }, |
| 222 | + { |
| 223 | + "name": "stderr", |
| 224 | + "output_type": "stream", |
| 225 | + "text": [ |
| 226 | + " 18%|█▊ | 8598/46811 [08:58<47:41, 13.36frames/s]" |
| 227 | + ] |
| 228 | + } |
| 229 | + ], |
| 230 | + "source": [ |
| 231 | + "\n", |
| 232 | + "# now invoke the plugin to transcript\n", |
| 233 | + "audio_transcript = await kernel.invoke(\n", |
148 | 234 | " extractaudio_plugin[\"TranscriptTimeline\"],\n", |
149 | | - " publishing_plugin[\"VideoTimelineCreator\"],\n", |
150 | | - " input_str=\"\"\"S:\\\\OneDrive\\\\Youtube\\\\AI\\\\SemanticChain\\\\MontaggiCompleti\\\\200-intro-kernel-memory.mp4\"\"\"\n", |
151 | | - ")\n" |
| 235 | + " audiofile = result\n", |
| 236 | + ")\n", |
| 237 | + "\n", |
| 238 | + "print (audio_transcript)\n" |
152 | 239 | ] |
153 | 240 | }, |
154 | 241 | { |
|
157 | 244 | "metadata": {}, |
158 | 245 | "outputs": [], |
159 | 246 | "source": [ |
160 | | - "print(result)" |
| 247 | + "\n", |
| 248 | + "# now invoke the plugin to create the video timeline\n", |
| 249 | + "video_summarization = await kernel.invoke(\n", |
| 250 | + " publishing_plugin[\"VideoTimelineCreator\"],\n", |
| 251 | + " input = audio_transcript\n", |
| 252 | + ")\n", |
| 253 | + "print (video_summarization)" |
161 | 254 | ] |
162 | 255 | } |
163 | 256 | ], |
|
0 commit comments