Skip to content

Commit d5cbfea

Browse files
committed
feat: enable video uploads for Gemini 2.5 Pro models
- Add supportsVideo property to ModelInfo schema - Mark Gemini 2.5 Flash and Pro models as supporting video - Update ChatTextArea to accept video files (mp4, mov, avi, webm) when model supports video - Pass supportsVideo prop from ChatView to ChatTextArea component Fixes #6144
1 parent d48be23 commit d5cbfea

File tree

4 files changed

+34
-15
lines changed

4 files changed

+34
-15
lines changed

packages/types/src/model.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ export const modelInfoSchema = z.object({
3232
maxThinkingTokens: z.number().nullish(),
3333
contextWindow: z.number(),
3434
supportsImages: z.boolean().optional(),
35+
supportsVideo: z.boolean().optional(),
3536
supportsComputerUse: z.boolean().optional(),
3637
supportsPromptCache: z.boolean(),
3738
supportsReasoningBudget: z.boolean().optional(),

packages/types/src/providers/gemini.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ export const geminiModels = {
5252
maxTokens: 64_000,
5353
contextWindow: 1_048_576,
5454
supportsImages: true,
55+
supportsVideo: true,
5556
supportsPromptCache: true,
5657
inputPrice: 0.3,
5758
outputPrice: 2.5,
@@ -64,6 +65,7 @@ export const geminiModels = {
6465
maxTokens: 65_535,
6566
contextWindow: 1_048_576,
6667
supportsImages: true,
68+
supportsVideo: true,
6769
supportsPromptCache: false,
6870
inputPrice: 0,
6971
outputPrice: 0,
@@ -72,6 +74,7 @@ export const geminiModels = {
7274
maxTokens: 65_535,
7375
contextWindow: 1_048_576,
7476
supportsImages: true,
77+
supportsVideo: true,
7578
supportsPromptCache: true,
7679
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
7780
outputPrice: 15,
@@ -96,6 +99,7 @@ export const geminiModels = {
9699
maxTokens: 65_535,
97100
contextWindow: 1_048_576,
98101
supportsImages: true,
102+
supportsVideo: true,
99103
supportsPromptCache: true,
100104
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
101105
outputPrice: 15,
@@ -120,6 +124,7 @@ export const geminiModels = {
120124
maxTokens: 65_535,
121125
contextWindow: 1_048_576,
122126
supportsImages: true,
127+
supportsVideo: true,
123128
supportsPromptCache: true,
124129
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
125130
outputPrice: 15,
@@ -146,6 +151,7 @@ export const geminiModels = {
146151
maxTokens: 64_000,
147152
contextWindow: 1_048_576,
148153
supportsImages: true,
154+
supportsVideo: true,
149155
supportsPromptCache: true,
150156
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
151157
outputPrice: 15,

webview-ui/src/components/chat/ChatTextArea.tsx

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ interface ChatTextAreaProps {
4242
onSend: () => void
4343
onSelectImages: () => void
4444
shouldDisableImages: boolean
45+
supportsVideo?: boolean
4546
onHeightChange?: (height: number) => void
4647
mode: Mode
4748
setMode: (value: Mode) => void
@@ -64,6 +65,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
6465
onSend,
6566
onSelectImages,
6667
shouldDisableImages,
68+
supportsVideo = false,
6769
onHeightChange,
6870
mode,
6971
setMode,
@@ -598,17 +600,21 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
598600
return
599601
}
600602

601-
const acceptedTypes = ["png", "jpeg", "webp"]
603+
const acceptedImageTypes = ["png", "jpeg", "webp"]
604+
const acceptedVideoTypes = supportsVideo ? ["mp4", "mov", "avi", "webm"] : []
602605

603-
const imageItems = Array.from(items).filter((item) => {
606+
const mediaItems = Array.from(items).filter((item) => {
604607
const [type, subtype] = item.type.split("/")
605-
return type === "image" && acceptedTypes.includes(subtype)
608+
return (
609+
(type === "image" && acceptedImageTypes.includes(subtype)) ||
610+
(type === "video" && acceptedVideoTypes.includes(subtype))
611+
)
606612
})
607613

608-
if (!shouldDisableImages && imageItems.length > 0) {
614+
if (!shouldDisableImages && mediaItems.length > 0) {
609615
e.preventDefault()
610616

611-
const imagePromises = imageItems.map((item) => {
617+
const mediaPromises = mediaItems.map((item) => {
612618
return new Promise<string | null>((resolve) => {
613619
const blob = item.getAsFile()
614620

@@ -633,8 +639,8 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
633639
})
634640
})
635641

636-
const imageDataArray = await Promise.all(imagePromises)
637-
const dataUrls = imageDataArray.filter((dataUrl): dataUrl is string => dataUrl !== null)
642+
const mediaDataArray = await Promise.all(mediaPromises)
643+
const dataUrls = mediaDataArray.filter((dataUrl): dataUrl is string => dataUrl !== null)
638644

639645
if (dataUrls.length > 0) {
640646
setSelectedImages((prevImages) => [...prevImages, ...dataUrls].slice(0, MAX_IMAGES_PER_MESSAGE))
@@ -643,7 +649,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
643649
}
644650
}
645651
},
646-
[shouldDisableImages, setSelectedImages, cursorPosition, setInputValue, inputValue, t],
652+
[shouldDisableImages, setSelectedImages, cursorPosition, setInputValue, inputValue, t, supportsVideo],
647653
)
648654

649655
const handleMenuMouseDown = useCallback(() => {
@@ -732,15 +738,19 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
732738
const files = Array.from(e.dataTransfer.files)
733739

734740
if (files.length > 0) {
735-
const acceptedTypes = ["png", "jpeg", "webp"]
741+
const acceptedImageTypes = ["png", "jpeg", "webp"]
742+
const acceptedVideoTypes = supportsVideo ? ["mp4", "mov", "avi", "webm"] : []
736743

737-
const imageFiles = files.filter((file) => {
744+
const mediaFiles = files.filter((file) => {
738745
const [type, subtype] = file.type.split("/")
739-
return type === "image" && acceptedTypes.includes(subtype)
746+
return (
747+
(type === "image" && acceptedImageTypes.includes(subtype)) ||
748+
(type === "video" && acceptedVideoTypes.includes(subtype))
749+
)
740750
})
741751

742-
if (!shouldDisableImages && imageFiles.length > 0) {
743-
const imagePromises = imageFiles.map((file) => {
752+
if (!shouldDisableImages && mediaFiles.length > 0) {
753+
const mediaPromises = mediaFiles.map((file) => {
744754
return new Promise<string | null>((resolve) => {
745755
const reader = new FileReader()
746756

@@ -758,8 +768,8 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
758768
})
759769
})
760770

761-
const imageDataArray = await Promise.all(imagePromises)
762-
const dataUrls = imageDataArray.filter((dataUrl): dataUrl is string => dataUrl !== null)
771+
const mediaDataArray = await Promise.all(mediaPromises)
772+
const dataUrls = mediaDataArray.filter((dataUrl): dataUrl is string => dataUrl !== null)
763773

764774
if (dataUrls.length > 0) {
765775
setSelectedImages((prevImages) =>
@@ -785,6 +795,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
785795
shouldDisableImages,
786796
setSelectedImages,
787797
t,
798+
supportsVideo,
788799
],
789800
)
790801

webview-ui/src/components/chat/ChatView.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,6 +1848,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
18481848
onSend={() => handleSendMessage(inputValue, selectedImages)}
18491849
onSelectImages={selectImages}
18501850
shouldDisableImages={shouldDisableImages}
1851+
supportsVideo={model?.supportsVideo || false}
18511852
onHeightChange={() => {
18521853
if (isAtBottom) {
18531854
scrollToBottomAuto()

0 commit comments

Comments
 (0)