Skip to content

Commit f5fb390

Browse files
committed
Updated some of the python examples
1 parent 628930d commit f5fb390

File tree

4 files changed

+207
-148
lines changed

4 files changed

+207
-148
lines changed

01_basic/00_test_plugin.ipynb

Lines changed: 4 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,54 +2,9 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 4,
5+
"execution_count": null,
66
"metadata": {},
7-
"outputs": [
8-
{
9-
"name": "stdout",
10-
"output_type": "stream",
11-
"text": [
12-
"Extracting auio file from video S:\\Downloads\\1.mp4\n",
13-
"Extracting transcript from audio file S:\\Downloads\\1.wav\n",
14-
"Using device: cuda:0 to run whisper with model large-v3\n",
15-
"Detected language: Italian\n"
16-
]
17-
},
18-
{
19-
"name": "stderr",
20-
"output_type": "stream",
21-
"text": [
22-
"100%|██████████| 737030/737030 [3:52:51<00:00, 52.75frames/s] "
23-
]
24-
},
25-
{
26-
"name": "stdout",
27-
"output_type": "stream",
28-
"text": [
29-
"extracted 2162 audio segments\n"
30-
]
31-
},
32-
{
33-
"name": "stderr",
34-
"output_type": "stream",
35-
"text": [
36-
"\n"
37-
]
38-
},
39-
{
40-
"ename": "UnicodeEncodeError",
41-
"evalue": "'charmap' codec can't encode character '\\u1edb' in position 2725: character maps to <undefined>",
42-
"output_type": "error",
43-
"traceback": [
44-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
45-
"\u001b[1;31mUnicodeEncodeError\u001b[0m Traceback (most recent call last)",
46-
"Cell \u001b[1;32mIn[4], line 8\u001b[0m\n\u001b[0;32m 6\u001b[0m file \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mS:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mDownloads\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124m1.mp4\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 7\u001b[0m audio \u001b[38;5;241m=\u001b[39m av\u001b[38;5;241m.\u001b[39mextract_audio(file)\n\u001b[1;32m----> 8\u001b[0m \u001b[43mav\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtranscript_timeline\u001b[49m\u001b[43m(\u001b[49m\u001b[43maudio\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlarge-v3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
47-
"File \u001b[1;32mc:\\develop\\github\\SemanticKernelPlayground\\01_basic\\plugins\\AudioVideoPlugin\\AudioVideo.py:85\u001b[0m, in \u001b[0;36mAudioVideo.transcript_timeline\u001b[1;34m(self, audiofile, model)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[38;5;66;03m# Write all raw_transcription_string to a file\u001b[39;00m\n\u001b[0;32m 84\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(text_path, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[1;32m---> 85\u001b[0m \u001b[43mfile\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraw_transcription_string\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m transcription_string\n",
48-
"File \u001b[1;32m~\\anaconda3\\lib\\encodings\\cp1252.py:19\u001b[0m, in \u001b[0;36mIncrementalEncoder.encode\u001b[1;34m(self, input, final)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mencode\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m, final\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m---> 19\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcodecs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcharmap_encode\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\u001b[43mencoding_table\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n",
49-
"\u001b[1;31mUnicodeEncodeError\u001b[0m: 'charmap' codec can't encode character '\\u1edb' in position 2725: character maps to <undefined>"
50-
]
51-
}
52-
],
7+
"outputs": [],
538
"source": [
549
"from plugins.AudioVideoPlugin.AudioVideo import AudioVideo\n",
5510
"\n",
@@ -63,21 +18,9 @@
6318
},
6419
{
6520
"cell_type": "code",
66-
"execution_count": 5,
21+
"execution_count": null,
6722
"metadata": {},
68-
"outputs": [
69-
{
70-
"ename": "NameError",
71-
"evalue": "name 'transcription_string' is not defined",
72-
"output_type": "error",
73-
"traceback": [
74-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
75-
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
76-
"Cell \u001b[1;32mIn[5], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m ( \u001b[43mtranscription_string\u001b[49m)\n",
77-
"\u001b[1;31mNameError\u001b[0m: name 'transcription_string' is not defined"
78-
]
79-
}
80-
],
23+
"outputs": [],
8124
"source": [
8225
"print ( transcription_string)"
8326
]

01_basic/03_chain_skill_call.ipynb

Lines changed: 111 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,16 @@
6868
}
6969
],
7070
"source": [
71+
"from semantic_kernel import Kernel\n",
7172
"from semantic_kernel.connectors.ai.open_ai import (\n",
7273
" AzureChatCompletion,\n",
7374
" AzureTextCompletion,\n",
7475
")\n",
7576
"\n",
7677
"model = os.getenv(\"AZURE_GPT4_MODEL\", \"gpt4\")\n",
7778
"endpoint = os.getenv(\"OPENAI_API_BASE\")\n",
78-
"kernel = sk.Kernel(log=logger)\n",
79-
"kernel.add_chat_service(\n",
80-
" \"chat_completion\",\n",
79+
"kernel = Kernel(log=logger)\n",
80+
"kernel.add_service(\n",
8181
" AzureChatCompletion(\n",
8282
" model,\n",
8383
" endpoint = endpoint,\n",
@@ -100,21 +100,52 @@
100100
"# Now you can import the plugin importing skill directly from the function you declared\n",
101101
"# in the plugin directory. The import_skill does not need the path, it only need an\n",
102102
"# instance of the skill and the name of the skill\n",
103-
"extractaudio_plugin = kernel.import_skill(AudioVideo(), skill_name=\"AudioVideoPlugin\")\n",
103+
"extractaudio_plugin = kernel.add_plugin(AudioVideo(), \"AudioVideoPlugin\")\n",
104104
"\n",
105105
"plugins_directory = \"./plugins\"\n",
106106
"\n",
107107
"# Import the OrchestratorPlugin from the plugins directory.\n",
108-
"publishing_plugin = kernel.import_semantic_skill_from_directory(\n",
109-
" plugins_directory, \"PublishingPlugin\"\n",
108+
"publishing_plugin = kernel.add_plugin(\n",
109+
" parent_directory=plugins_directory, \n",
110+
" plugin_name=\"PublishingPlugin\"\n",
110111
")\n"
111112
]
112113
},
113114
{
114115
"cell_type": "code",
115116
"execution_count": null,
116117
"metadata": {},
117-
"outputs": [],
118+
"outputs": [
119+
{
120+
"data": {
121+
"text/plain": [
122+
"KernelFunctionFromMethod(metadata=KernelFunctionMetadata(name='ExtractAudio', plugin_name='AudioVideoPlugin', description='extract audio in wav format from an mp4 file', parameters=[KernelParameterMetadata(name='videofile', description='Full path to the mp4 file', default_value=None, type_='str', is_required=True, type_object=<class 'str'>)], is_prompt=False, is_asynchronous=False, return_parameter=KernelParameterMetadata(name='return', description='output audio file path', default_value=None, type_='str', is_required=True, type_object=None)), method=<bound method AudioVideo.extract_audio of <plugins.AudioVideoPlugin.AudioVideo.AudioVideo object at 0x00000212EC39C250>>, stream_method=None)"
123+
]
124+
},
125+
"execution_count": 5,
126+
"metadata": {},
127+
"output_type": "execute_result"
128+
}
129+
],
130+
"source": [
131+
"extractaudio_plugin[\"ExtractAudio\"] #This is how you can call the plug"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": 6,
137+
"metadata": {},
138+
"outputs": [
139+
{
140+
"name": "stdout",
141+
"output_type": "stream",
142+
"text": [
143+
"KernelFunctionFromMethod(metadata=KernelFunctionMetadata(name='ExtractAudio', plugin_name='AudioVideoPlugin', description='extract audio in wav format from an mp4 file', parameters=[KernelParameterMetadata(name='videofile', description='Full path to the mp4 file', default_value=None, type_='str', is_required=True, type_object=<class 'str'>)], is_prompt=False, is_asynchronous=False, return_parameter=KernelParameterMetadata(name='return', description='output audio file path', default_value=None, type_='str', is_required=True, type_object=None)), method=<bound method AudioVideo.extract_audio of <plugins.AudioVideoPlugin.AudioVideo.AudioVideo object at 0x00000212EC39C250>>, stream_method=None)\n",
144+
"KernelFunctionFromMethod(metadata=KernelFunctionMetadata(name='TranscriptTimeline', plugin_name='AudioVideoPlugin', description='Transcript audio from a wav file to a timeline', parameters=[KernelParameterMetadata(name='audiofile', description='Full path to the wav file', default_value=None, type_='str', is_required=True, type_object=<class 'str'>)], is_prompt=False, is_asynchronous=False, return_parameter=KernelParameterMetadata(name='return', description='', default_value=None, type_='str', is_required=True, type_object=None)), method=<bound method AudioVideo.transcript_timeline of <plugins.AudioVideoPlugin.AudioVideo.AudioVideo object at 0x00000212EC39C250>>, stream_method=None)\n",
145+
"KernelFunctionFromPrompt(metadata=KernelFunctionMetadata(name='VideoTimelineCreator', plugin_name='PublishingPlugin', description='Given a video transcript it can summarize and generate a timeline', parameters=[KernelParameterMetadata(name='input', description='', default_value='', type_='', is_required=True, type_object=None)], is_prompt=True, is_asynchronous=True, return_parameter=KernelParameterMetadata(name='return', description='The completion result', default_value=None, type_='FunctionResult', is_required=True, type_object=None)), prompt_template=KernelPromptTemplate(prompt_template_config=PromptTemplateConfig(name='VideoTimelineCreator', description='Given a video transcript it can summarize and generate a timeline', template='I will give you a transcript of a video. The transcript contains phrases prefixed by the timestamp where the phrase starts. I want you to identify between three and ten main sections of the video. You must never identify more than ten sections.\\nFor each section you will create a brief title prefixed with the start timestamp of the section obtained analyzing all the text belonging to that section.\\n\\nEXAMPLE ANSWER - Maximum of ten sections\\n00:00 - Title of section 1\\n00:33 - Title of section 2\\n01:23 - Title of section 3\\n\\n[DATA]\\n{{$input}}', template_format='semantic-kernel', input_variables=[InputVariable(name='input', description='', default='', is_required=True, json_schema='')], execution_settings={})), prompt_execution_settings={})\n"
146+
]
147+
}
148+
],
118149
"source": [
119150
"from pprint import pprint\n",
120151
"# want to print all the keys of extractaudio_plugin that is a dictionary\n",
@@ -126,9 +157,20 @@
126157
},
127158
{
128159
"cell_type": "code",
129-
"execution_count": null,
160+
"execution_count": 7,
130161
"metadata": {},
131-
"outputs": [],
162+
"outputs": [
163+
{
164+
"data": {
165+
"text/plain": [
166+
"True"
167+
]
168+
},
169+
"execution_count": 7,
170+
"metadata": {},
171+
"output_type": "execute_result"
172+
}
173+
],
132174
"source": [
133175
"# you can verify if cuda is available.\n",
134176
"import torch\n",
@@ -137,18 +179,63 @@
137179
},
138180
{
139181
"cell_type": "code",
140-
"execution_count": null,
182+
"execution_count": 8,
141183
"metadata": {},
142-
"outputs": [],
184+
"outputs": [
185+
{
186+
"name": "stdout",
187+
"output_type": "stream",
188+
"text": [
189+
"Extracting auio file from video S:\\OneDrive\\Youtube\\AI\\SemanticChain\\MontaggiCompleti\\010-CsharpIntro.mp4\n",
190+
"S:\\OneDrive\\Youtube\\AI\\SemanticChain\\MontaggiCompleti\\010-CsharpIntro.wav\n"
191+
]
192+
}
193+
],
143194
"source": [
144-
"# Now we can use the skill\n",
145-
"# Run the Sqrt function with the context.\n",
146-
"result = await kernel.run_async(\n",
195+
"import time\n",
196+
"\n",
197+
"result = await kernel.invoke(\n",
147198
" extractaudio_plugin[\"ExtractAudio\"],\n",
199+
" #videofile =\"S:\\\\OneDrive\\\\Youtube\\\\AI\\\\SemanticChain\\\\MontaggiCompleti\\\\250-NlpPrecisionRecallRerank.mp4\"\n",
200+
" videofile =\"S:\\\\OneDrive\\\\Youtube\\\\AI\\\\SemanticChain\\\\MontaggiCompleti\\\\010-CsharpIntro.mp4\"\n",
201+
")\n",
202+
"\n",
203+
"print (result)\n",
204+
"\n",
205+
"time.sleep(1)\n"
206+
]
207+
},
208+
{
209+
"cell_type": "code",
210+
"execution_count": 9,
211+
"metadata": {},
212+
"outputs": [
213+
{
214+
"name": "stdout",
215+
"output_type": "stream",
216+
"text": [
217+
"Extracting transcript from audio file S:\\OneDrive\\Youtube\\AI\\SemanticChain\\MontaggiCompleti\\010-CsharpIntro.wav\n",
218+
"Using device: cuda:0 to run whisper with model large-v3\n",
219+
"Detected language: English\n"
220+
]
221+
},
222+
{
223+
"name": "stderr",
224+
"output_type": "stream",
225+
"text": [
226+
" 18%|█▊ | 8598/46811 [08:58<47:41, 13.36frames/s]"
227+
]
228+
}
229+
],
230+
"source": [
231+
"\n",
232+
"# now invoke the plugin to transcript\n",
233+
"audio_transcript = await kernel.invoke(\n",
148234
" extractaudio_plugin[\"TranscriptTimeline\"],\n",
149-
" publishing_plugin[\"VideoTimelineCreator\"],\n",
150-
" input_str=\"\"\"S:\\\\OneDrive\\\\Youtube\\\\AI\\\\SemanticChain\\\\MontaggiCompleti\\\\200-intro-kernel-memory.mp4\"\"\"\n",
151-
")\n"
235+
" audiofile = result\n",
236+
")\n",
237+
"\n",
238+
"print (audio_transcript)\n"
152239
]
153240
},
154241
{
@@ -157,7 +244,13 @@
157244
"metadata": {},
158245
"outputs": [],
159246
"source": [
160-
"print(result)"
247+
"\n",
248+
"# now invoke the plugin to create the video timeline\n",
249+
"video_summarization = await kernel.invoke(\n",
250+
" publishing_plugin[\"VideoTimelineCreator\"],\n",
251+
" input = audio_transcript\n",
252+
")\n",
253+
"print (video_summarization)"
161254
]
162255
}
163256
],

01_basic/plugins/AudioVideoPlugin/AudioVideo.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
import subprocess
2-
from semantic_kernel.skill_definition import sk_function
2+
from typing import Annotated
3+
from semantic_kernel.functions import kernel_function
34
import os
45
import whisper
56
import torch
67

78
class AudioVideo:
89

9-
@sk_function(
10+
@kernel_function(
1011
description="extract audio in wav format from an mp4 file",
1112
name="ExtractAudio",
12-
input_description="Full path to the mp4 file",
1313
)
14-
def extract_audio(self, videofile: str) -> str:
14+
def extract_audio(
15+
self,
16+
videofile: Annotated[str, "Full path to the mp4 file"]) -> Annotated[str, "output audio file path"]:
1517
"""
1618
Extract audio from a video file and return the full path to the extracted file.
1719
@@ -33,12 +35,13 @@ def extract_audio(self, videofile: str) -> str:
3335
# now ffmpeg has created the audio file, return the path to it
3436
return audio_path
3537

36-
@sk_function(
38+
@kernel_function(
3739
description="Transcript audio from a wav file to a timeline",
3840
name="TranscriptTimeline",
39-
input_description="Full path to the wav file",
4041
)
41-
def transcript_timeline(self, audiofile: str) -> str:
42+
def transcript_timeline(
43+
self,
44+
audiofile: Annotated[str, "Full path to the wav file"]) -> str:
4245

4346
"""
4447
Extract a transcript from an audio file and return a transcript file that
@@ -47,7 +50,7 @@ def transcript_timeline(self, audiofile: str) -> str:
4750
:param audiofile: Full path to the wav file
4851
:return: transcripted text with start and end time
4952
"""
50-
model: str = "medium.en" #"large-v3" #"medium.en"
53+
model: str = "medium.en" ##"tiny.en" #"large-v3" #"medium.en"
5154
print(f"Extracting transcript from audio file {audiofile}")
5255
# model = whisper.load_model("medium.en")
5356

0 commit comments

Comments
 (0)