Skip to content

Commit 2b09c53

Browse files
ilopezlunaCopilot
andauthored
Multimodal support in cli (#269)
* feat: add multimodal support for image processing in CLI * test: add tests for prompt cleaning with multiple image inputs * Apply suggestion from @Copilot Co-authored-by: Copilot <[email protected]> * Apply suggestion from @Copilot Co-authored-by: Copilot <[email protected]> * feat: enhance image path extraction to support quoted paths and spaces --------- Co-authored-by: Copilot <[email protected]>
1 parent 5eea054 commit 2b09c53

File tree

6 files changed

+677
-14
lines changed

6 files changed

+677
-14
lines changed

cmd/cli/commands/images.go

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
package commands
2+
3+
import (
4+
"encoding/base64"
5+
"errors"
6+
"fmt"
7+
"io"
8+
"net/http"
9+
"os"
10+
"regexp"
11+
"slices"
12+
"strings"
13+
)
14+
15+
// MaxImageSizeBytes is the maximum allowed size for image files (100MB)
16+
const MaxImageSizeBytes int64 = 100 * 1024 * 1024
17+
18+
// extractImagePaths finds image file paths in the input string using regex
19+
// Matches paths like: /path/to/file.jpg, ./image.png, C:\photos\pic.webp
20+
// Also handles quoted paths with spaces: "/path/to/my file.jpg" or '/path/with spaces/image.png'
21+
// Unquoted paths with spaces are also supported for better UX
22+
func extractImagePaths(input string) []string {
23+
// Regex to match file paths:
24+
// - Quoted paths (double or single quotes) - can contain spaces
25+
// - Unix absolute: /path/to/file.jpg (with or without spaces)
26+
// - Unix relative: ./path/to/file.jpg (with or without spaces)
27+
// - Windows absolute: C:\path\to\file.jpg or C:/path/to/file.jpg (with or without spaces)
28+
// - Windows relative: \path\to\file.jpg (with or without spaces)
29+
// Pattern explanation:
30+
// 1. "([^"]+?\.(?i:jpg|jpeg|png|webp))" - Double-quoted paths
31+
// 2. '([^']+?\.(?i:jpg|jpeg|png|webp))' - Single-quoted paths
32+
// 3. (?:[a-zA-Z]:[/\\]|[/\\]|\./)[^\n"']*?\.(?i:jpg|jpeg|png|webp)\b - Unquoted paths
33+
// - Matches from path start to image extension
34+
// - [^\n"'] allows spaces but stops at newlines or quotes
35+
// - Non-greedy *? ensures we stop at first valid extension
36+
// - \b word boundary ensures clean extension match
37+
regexPattern := `"([^"]+?\.(?i:jpg|jpeg|png|webp))"|'([^']+?\.(?i:jpg|jpeg|png|webp))'|(?:[a-zA-Z]:[/\\]|[/\\]|\./)[^\n"']*?\.(?i:jpg|jpeg|png|webp)\b`
38+
re := regexp.MustCompile(regexPattern)
39+
matches := re.FindAllStringSubmatch(input, -1)
40+
41+
var paths []string
42+
for _, match := range matches {
43+
// match[0] is the full match
44+
// match[1] is the double-quoted path (if matched)
45+
// match[2] is the single-quoted path (if matched)
46+
// If neither capture group matched, match[0] is the unquoted path
47+
if match[1] != "" {
48+
paths = append(paths, match[1]) // double-quoted path
49+
} else if match[2] != "" {
50+
paths = append(paths, match[2]) // single-quoted path
51+
} else {
52+
paths = append(paths, match[0]) // unquoted path
53+
}
54+
}
55+
56+
return paths
57+
}
58+
59+
// normalizeFilePath handles escaped characters in file paths
60+
// Converts escaped spaces, parentheses, brackets, etc. to their literal form
61+
func normalizeFilePath(filePath string) string {
62+
return strings.NewReplacer(
63+
"\\ ", " ", // Escaped space
64+
"\\(", "(", // Escaped left parenthesis
65+
"\\)", ")", // Escaped right parenthesis
66+
"\\[", "[", // Escaped left square bracket
67+
"\\]", "]", // Escaped right square bracket
68+
"\\{", "{", // Escaped left curly brace
69+
"\\}", "}", // Escaped right curly brace
70+
"\\$", "$", // Escaped dollar sign
71+
"\\&", "&", // Escaped ampersand
72+
"\\;", ";", // Escaped semicolon
73+
"\\'", "'", // Escaped single quote
74+
"\\\\", "\\", // Escaped backslash
75+
"\\*", "*", // Escaped asterisk
76+
"\\?", "?", // Escaped question mark
77+
"\\~", "~", // Escaped tilde
78+
).Replace(filePath)
79+
}
80+
81+
// encodeImageToDataURL reads an image file, validates it, and encodes it to a base64 data URL
82+
// Returns a data URL like: data:image/jpeg;base64,/9j/4AAQ...
83+
func encodeImageToDataURL(filePath string) (string, error) {
84+
file, err := os.Open(filePath)
85+
if err != nil {
86+
return "", err
87+
}
88+
defer file.Close()
89+
90+
// Read first 512 bytes to detect content type
91+
buf := make([]byte, 512)
92+
_, err = file.Read(buf)
93+
if err != nil {
94+
return "", err
95+
}
96+
97+
contentType := http.DetectContentType(buf)
98+
allowedTypes := []string{"image/jpeg", "image/png", "image/webp"}
99+
if !slices.Contains(allowedTypes, contentType) {
100+
return "", fmt.Errorf("invalid image type: %s", contentType)
101+
}
102+
103+
info, err := file.Stat()
104+
if err != nil {
105+
return "", err
106+
}
107+
108+
// Check if the file size exceeds the maximum limit
109+
if info.Size() > MaxImageSizeBytes {
110+
return "", fmt.Errorf("file size exceeds maximum limit (%d MB)", MaxImageSizeBytes/(1024*1024))
111+
}
112+
113+
// Read entire file
114+
buf = make([]byte, info.Size())
115+
_, err = file.Seek(0, 0)
116+
if err != nil {
117+
return "", err
118+
}
119+
120+
_, err = io.ReadFull(file, buf)
121+
if err != nil {
122+
return "", err
123+
}
124+
125+
// Encode to base64
126+
encoded := base64.StdEncoding.EncodeToString(buf)
127+
128+
// Create data URL
129+
dataURL := fmt.Sprintf("data:%s;base64,%s", contentType, encoded)
130+
131+
return dataURL, nil
132+
}
133+
134+
// processImagesInPrompt extracts images from the prompt, encodes them to data URLs,
135+
// and returns the cleaned prompt text and list of image data URLs
136+
func processImagesInPrompt(prompt string) (string, []string, error) {
137+
imagePaths := extractImagePaths(prompt)
138+
var imageDataURLs []string
139+
140+
for _, filePath := range imagePaths {
141+
nfp := normalizeFilePath(filePath)
142+
dataURL, err := encodeImageToDataURL(nfp)
143+
if errors.Is(err, os.ErrNotExist) {
144+
// Skip non-existent files (might be false positive from regex)
145+
continue
146+
} else if err != nil {
147+
return "", nil, fmt.Errorf("couldn't process image %q: %w", nfp, err)
148+
}
149+
150+
// Remove the image path from the prompt text
151+
prompt = strings.ReplaceAll(prompt, "'"+nfp+"'", "")
152+
prompt = strings.ReplaceAll(prompt, "'"+filePath+"'", "")
153+
prompt = strings.ReplaceAll(prompt, nfp, "")
154+
prompt = strings.ReplaceAll(prompt, filePath, "")
155+
156+
imageDataURLs = append(imageDataURLs, dataURL)
157+
}
158+
159+
return strings.TrimSpace(prompt), imageDataURLs, nil
160+
}

0 commit comments

Comments
 (0)