diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index 3bd66782cfe..5dac9d9df6c 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -32,6 +32,7 @@ export const modelInfoSchema = z.object({ maxThinkingTokens: z.number().nullish(), contextWindow: z.number(), supportsImages: z.boolean().optional(), + supportsVideo: z.boolean().optional(), supportsComputerUse: z.boolean().optional(), supportsPromptCache: z.boolean(), supportsReasoningBudget: z.boolean().optional(), diff --git a/packages/types/src/providers/gemini.ts b/packages/types/src/providers/gemini.ts index a7225c7330f..866fef7d5fa 100644 --- a/packages/types/src/providers/gemini.ts +++ b/packages/types/src/providers/gemini.ts @@ -52,6 +52,7 @@ export const geminiModels = { maxTokens: 64_000, contextWindow: 1_048_576, supportsImages: true, + supportsVideo: true, supportsPromptCache: true, inputPrice: 0.3, outputPrice: 2.5, @@ -64,6 +65,7 @@ export const geminiModels = { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, + supportsVideo: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, @@ -72,6 +74,7 @@ export const geminiModels = { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, + supportsVideo: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, @@ -96,6 +99,7 @@ export const geminiModels = { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, + supportsVideo: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, @@ -120,6 +124,7 @@ export const geminiModels = { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, + supportsVideo: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, @@ -146,6 +151,7 @@ export const geminiModels = { maxTokens: 64_000, contextWindow: 1_048_576, supportsImages: true, + supportsVideo: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, diff --git a/webview-ui/src/components/chat/ChatTextArea.tsx b/webview-ui/src/components/chat/ChatTextArea.tsx index 6c541353eb2..2932b1b3b9c 100644 --- a/webview-ui/src/components/chat/ChatTextArea.tsx +++ b/webview-ui/src/components/chat/ChatTextArea.tsx @@ -42,6 +42,7 @@ interface ChatTextAreaProps { onSend: () => void onSelectImages: () => void shouldDisableImages: boolean + supportsVideo?: boolean onHeightChange?: (height: number) => void mode: Mode setMode: (value: Mode) => void @@ -64,6 +65,7 @@ const ChatTextArea = forwardRef( onSend, onSelectImages, shouldDisableImages, + supportsVideo = false, onHeightChange, mode, setMode, @@ -598,17 +600,21 @@ const ChatTextArea = forwardRef( return } - const acceptedTypes = ["png", "jpeg", "webp"] + const acceptedImageTypes = ["png", "jpeg", "webp"] + const acceptedVideoTypes = supportsVideo ? ["mp4", "mov", "avi", "webm"] : [] - const imageItems = Array.from(items).filter((item) => { + const mediaItems = Array.from(items).filter((item) => { const [type, subtype] = item.type.split("/") - return type === "image" && acceptedTypes.includes(subtype) + return ( + (type === "image" && acceptedImageTypes.includes(subtype)) || + (type === "video" && acceptedVideoTypes.includes(subtype)) + ) }) - if (!shouldDisableImages && imageItems.length > 0) { + if (!shouldDisableImages && mediaItems.length > 0) { e.preventDefault() - const imagePromises = imageItems.map((item) => { + const mediaPromises = mediaItems.map((item) => { return new Promise((resolve) => { const blob = item.getAsFile() @@ -633,8 +639,8 @@ const ChatTextArea = forwardRef( }) }) - const imageDataArray = await Promise.all(imagePromises) - const dataUrls = imageDataArray.filter((dataUrl): dataUrl is string => dataUrl !== null) + const mediaDataArray = await Promise.all(mediaPromises) + const dataUrls = mediaDataArray.filter((dataUrl): dataUrl is string => dataUrl !== null) if (dataUrls.length > 0) { setSelectedImages((prevImages) => [...prevImages, ...dataUrls].slice(0, MAX_IMAGES_PER_MESSAGE)) @@ -643,7 +649,7 @@ const ChatTextArea = forwardRef( } } }, - [shouldDisableImages, setSelectedImages, cursorPosition, setInputValue, inputValue, t], + [shouldDisableImages, setSelectedImages, cursorPosition, setInputValue, inputValue, t, supportsVideo], ) const handleMenuMouseDown = useCallback(() => { @@ -732,15 +738,19 @@ const ChatTextArea = forwardRef( const files = Array.from(e.dataTransfer.files) if (files.length > 0) { - const acceptedTypes = ["png", "jpeg", "webp"] + const acceptedImageTypes = ["png", "jpeg", "webp"] + const acceptedVideoTypes = supportsVideo ? ["mp4", "mov", "avi", "webm"] : [] - const imageFiles = files.filter((file) => { + const mediaFiles = files.filter((file) => { const [type, subtype] = file.type.split("/") - return type === "image" && acceptedTypes.includes(subtype) + return ( + (type === "image" && acceptedImageTypes.includes(subtype)) || + (type === "video" && acceptedVideoTypes.includes(subtype)) + ) }) - if (!shouldDisableImages && imageFiles.length > 0) { - const imagePromises = imageFiles.map((file) => { + if (!shouldDisableImages && mediaFiles.length > 0) { + const mediaPromises = mediaFiles.map((file) => { return new Promise((resolve) => { const reader = new FileReader() @@ -758,8 +768,8 @@ const ChatTextArea = forwardRef( }) }) - const imageDataArray = await Promise.all(imagePromises) - const dataUrls = imageDataArray.filter((dataUrl): dataUrl is string => dataUrl !== null) + const mediaDataArray = await Promise.all(mediaPromises) + const dataUrls = mediaDataArray.filter((dataUrl): dataUrl is string => dataUrl !== null) if (dataUrls.length > 0) { setSelectedImages((prevImages) => @@ -785,6 +795,7 @@ const ChatTextArea = forwardRef( shouldDisableImages, setSelectedImages, t, + supportsVideo, ], ) diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index efd2db856c0..ab8b610eba2 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -1848,6 +1848,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction handleSendMessage(inputValue, selectedImages)} onSelectImages={selectImages} shouldDisableImages={shouldDisableImages} + supportsVideo={model?.supportsVideo || false} onHeightChange={() => { if (isAtBottom) { scrollToBottomAuto()