Skip to content

Commit 62304ab

Browse files
committed
add multi-modal-agent
1 parent cf2aefe commit 62304ab

File tree

4 files changed

+260
-66
lines changed

4 files changed

+260
-66
lines changed

multi_modal_ai_agent/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
*.py[cod]
2+
__pycache__/
3+
*.db
4+
.web
5+
assets/external/
35 Bytes
Binary file not shown.

multi_modal_ai_agent/multi_modal_agent/multi_modal_agent.py

Lines changed: 86 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import reflex as rx
2-
import google.generativeai as genai
3-
from phi.agent import Agent
4-
from phi.model.google import Gemini
5-
from phi.tools.duckduckgo import DuckDuckGo
2+
from google import genai
63
import time
74
import asyncio
5+
from typing import List
6+
import traceback
87

98

109
class State(rx.State):
@@ -16,9 +15,11 @@ class State(rx.State):
1615
video: str = ""
1716
question: str = ""
1817

18+
@rx.event
1919
async def handle_upload(self, files: list[rx.UploadFile]):
2020
"""Handle video file upload."""
2121
if not files:
22+
self.upload_status = "Please select a video file."
2223
return
2324

2425
try:
@@ -41,127 +42,146 @@ async def handle_upload(self, files: list[rx.UploadFile]):
4142

4243
@rx.event(background=True)
4344
async def analyze_video(self):
44-
"""Process video and answer question using AI agent."""
45+
"""Process video and answer question using AI."""
4546
if not self.question:
4647
async with self:
4748
self.result = "Please enter your question."
4849
return
50+
51+
if not self.video:
52+
async with self:
53+
self.result = "Please upload a video first."
54+
return
55+
4956
async with self:
5057
self.processing = True
58+
self.result = "Analyzing Video..."
5159
yield
5260
await asyncio.sleep(1)
5361

5462
try:
55-
agent = Agent(
56-
name="Multimodal Video Analyst",
57-
model=Gemini(id="gemini-2.0-flash-exp"),
58-
tools=[DuckDuckGo()],
59-
markdown=True,
60-
)
63+
client = genai.Client()
6164

62-
video_file = genai.upload_file(str(self.video))
63-
while video_file.state.name == "PROCESSING":
65+
video_file = client.files.upload(file=str(self.video))
66+
while video_file.state == "PROCESSING":
6467
await asyncio.sleep(2)
6568
# time.sleep(2)
66-
video_file = genai.get_file(video_file.name)
69+
video_file = client.files.get(name=video_file.name)
6770

68-
prompt = f"""
69-
First analyze this video and then answer the following question using both
70-
the video analysis and web research: {self.question}
71-
Provide a comprehensive response focusing on practical, actionable information.
72-
"""
71+
response = client.models.generate_content(
72+
model="gemini-2.0-flash",
73+
contents=[
74+
video_file,
75+
"Describe this video.",
76+
])
7377

74-
result = agent.run(prompt, videos=[video_file])
7578

7679
async with self:
77-
self.result = result.content
80+
self.result = response.text
7881
self.processing = False
7982

8083
except Exception as e:
8184
async with self:
85+
full_error = traceback.format_exc()
8286
self.processing = False
8387
self.result = f"An error occurred: {str(e)}"
8488

85-
86-
color = "rgb(107,99,246)"
8789

88-
def index():
89-
return rx.container(
90-
rx.vstack(
91-
# Header section
92-
rx.heading("Multimodal AI Agent 🕵️‍♀️ 💬", size="8", mb="6"),
90+
def index() -> rx.Component:
91+
return rx.el.div(
92+
rx.el.div(
93+
# Header section with gradient background
94+
rx.el.div(
95+
rx.el.h1(
96+
"Multimodal AI Agent 🕵️‍♀️ 💬",
97+
class_name="text-5xl font-bold text-white mb-4"
98+
),
99+
class_name="w-full p-12 bg-gradient-to-r from-blue-600 to-blue-800 rounded-lg shadow-lg mb-8 text-center"
100+
),
93101

94102
# Upload section
95-
rx.vstack(
103+
rx.el.div(
96104
rx.upload(
97-
rx.vstack(
98-
rx.button(
105+
rx.el.div(
106+
rx.el.button(
99107
"Select a Video File",
100-
color=color,
101-
bg="white",
102-
border=f"1px solid {color}"
108+
class_name="bg-white text-blue-600 px-6 py-3 rounded-lg font-semibold border-2 border-blue-600 hover:bg-blue-50 transition-colors"
109+
),
110+
rx.el.p(
111+
"Drag and drop or click to select",
112+
class_name="text-gray-500 mt-2"
103113
),
104-
rx.text("Drag and drop or click to select"),
114+
class_name="text-center"
105115
),
106116
accept={".mp4", ".mov", ".avi"},
107117
max_files=1,
108-
border="1px dashed",
109-
padding="20px",
118+
class_name="border-2 border-dashed border-gray-300 rounded-lg p-8 bg-gray-50 hover:bg-gray-100 transition-colors",
110119
id="upload1"
111120
),
112121
rx.cond(
113-
rx.selected_files("upload1"),
114-
rx.text(rx.selected_files("upload1")[0]),
115-
rx.text(""),
122+
rx.selected_files("upload1"),
123+
rx.el.p(
124+
rx.selected_files("upload1")[0],
125+
class_name="text-gray-600 mt-2"
116126
),
117-
rx.button(
127+
rx.el.p("", class_name="mt-2"),
128+
),
129+
rx.el.button(
118130
"Upload",
119-
on_click=State.handle_upload(rx.upload_files(upload_id="upload1"))
131+
on_click=State.handle_upload(rx.upload_files(upload_id="upload1")),
132+
class_name="w-full bg-blue-600 text-white px-6 py-3 rounded-lg font-semibold hover:bg-blue-700 transition-colors mt-4"
120133
),
121-
rx.text(State.upload_status),
122-
spacing="4",
134+
rx.el.p(
135+
State.upload_status,
136+
class_name="text-gray-600 mt-2"
137+
),
138+
class_name="mb-8 p-6 bg-white rounded-lg shadow-lg"
123139
),
124140

125141
# Video and Analysis section
126142
rx.cond(
127143
State.video_filename != "",
128-
rx.vstack(
129-
rx.video(
130-
url=rx.get_upload_url(State.video_filename),
131-
width="50%",
132-
controls=True,
144+
rx.el.div(
145+
rx.el.div(
146+
rx.video(
147+
url=rx.get_upload_url(State.video_filename),
148+
controls=True,
149+
class_name="w-full rounded-lg shadow-lg"
150+
),
151+
class_name="mb-6"
133152
),
134-
rx.text_area(
135-
placeholder="Ask any question related to the video - the AI Agent will analyze it and search the web if needed",
153+
rx.el.textarea(
154+
placeholder="Ask any question related to the video - the AI Agent will analyze it",
136155
value=State.question,
137156
on_change=State.set_question,
138-
width="600px",
139-
size="2",
157+
class_name="w-full p-4 border-2 border-gray-300 rounded-lg focus:border-blue-600 focus:ring-1 focus:ring-blue-600 h-32 resize-none"
140158
),
141-
rx.button(
159+
rx.el.button(
142160
"Analyze & Research",
143161
on_click=State.analyze_video,
144162
loading=State.processing,
163+
class_name="w-full bg-blue-600 text-white px-6 py-3 rounded-lg font-semibold hover:bg-blue-700 transition-colors mt-4"
145164
),
146165
rx.cond(
147166
State.result != "",
148-
rx.vstack(
149-
rx.heading("🤖 Agent Response", size="4"),
150-
rx.markdown(State.result),
167+
rx.el.div(
168+
rx.el.h2(
169+
"🤖 Agent Response",
170+
class_name="text-2xl font-bold text-gray-800 mb-4"
171+
),
172+
rx.markdown(
173+
State.result,
174+
class_name="prose prose-blue max-w-none"
175+
),
176+
class_name="mt-8 p-6 bg-white rounded-lg shadow-lg"
151177
),
152178
),
153-
width="100%",
154-
spacing="4",
179+
class_name="space-y-6"
155180
),
156181
),
157-
width="100%",
158-
max_width="800px",
159-
spacing="6",
160-
padding="4",
182+
class_name="max-w-3xl mx-auto px-4"
161183
),
162-
max_width="600px",
163-
margin="auto",
164-
padding="40px"
184+
class_name="min-h-screen bg-gray-50 py-12"
165185
)
166186

167187

0 commit comments

Comments
 (0)