Skip to content

Commit 4a3a655

Browse files
committed
feat: Add smart MIME type detection for BLOB data
- Implement automatic MIME type detection based on file signatures (magic bytes) - Support common file formats: PNG, JPEG, GIF, BMP, WebP, SVG, PDF, DOCX, XLSX, PPTX, JSON, XML, ZIP - Automatic fallback to 'application/octet-stream' for unknown formats - Update CHANGELOG.md with comprehensive feature description - Add comprehensive tests for MIME type detection functionality - BLOB data now automatically returns appropriate data URLs: * PNG files: 'data:image/png;base64,...' * PDF files: 'data:application/pdf;base64,...' * SVG files: 'data:image/svg+xml;base64,...' * Unknown files: 'data:application/octet-stream;base64,...' - Improves user experience by providing correct MIME types for downloads and displays - Eliminates need for manual MIME type specification in most cases
1 parent 6962411 commit 4a3a655

File tree

2 files changed

+165
-1
lines changed

2 files changed

+165
-1
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
- **MySQL/MariaDB**: supports `BLOB` columns
1717
- **MSSQL**: Extended support for `VARBINARY`, `BIGVARBINARY`, `BINARY`, and `IMAGE` columns
1818
- **SQLite**: Full support for `BLOB` columns
19+
- **Smart MIME Type Detection**: Automatic detection of common file types based on magic bytes:
20+
- **Images**: PNG, JPEG/JPG, GIF, BMP, WebP, SVG
21+
- **Documents**: PDF, DOCX, XLSX, PPTX
22+
- **Data**: JSON, XML, ZIP archives
1923

2024
## v0.36.1
2125
- Fix regression introduced in v0.36.0: PostgreSQL money values showed as 0.0

src/webserver/database/sql_to_json.rs

Lines changed: 161 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,100 @@ pub fn row_to_string(row: &AnyRow) -> Option<String> {
116116

117117
/// Converts binary data to a data URL string.
118118
/// This function is used by both SQL type conversion and file reading functions.
119+
/// Automatically detects common file types based on magic bytes.
119120
pub fn vec_to_data_uri(bytes: &[u8]) -> String {
120-
vec_to_data_uri_with_mime(bytes, "application/octet-stream")
121+
let mime_type = detect_mime_type(bytes);
122+
vec_to_data_uri_with_mime(bytes, mime_type)
123+
}
124+
125+
/// Detects MIME type based on file signatures (magic bytes).
126+
/// Returns the most appropriate MIME type for common file formats.
127+
pub fn detect_mime_type(bytes: &[u8]) -> &'static str {
128+
if bytes.is_empty() {
129+
return "application/octet-stream";
130+
}
131+
132+
// Check for PNG (Portable Network Graphics)
133+
if bytes.len() >= 8 && &bytes[0..8] == &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] {
134+
return "image/png";
135+
}
136+
137+
// Check for JPEG (Joint Photographic Experts Group)
138+
if bytes.len() >= 2 && &bytes[0..2] == &[0xFF, 0xD8] {
139+
return "image/jpeg";
140+
}
141+
142+
// Check for GIF (Graphics Interchange Format)
143+
if bytes.len() >= 6 {
144+
if &bytes[0..6] == &[0x47, 0x49, 0x46, 0x38, 0x37, 0x61] || // GIF87a
145+
&bytes[0..6] == &[0x47, 0x49, 0x46, 0x38, 0x39, 0x61] { // GIF89a
146+
return "image/gif";
147+
}
148+
}
149+
150+
// Check for BMP (Bitmap)
151+
if bytes.len() >= 2 && &bytes[0..2] == &[0x42, 0x4D] {
152+
return "image/bmp";
153+
}
154+
155+
// Check for WebP
156+
if bytes.len() >= 12 && &bytes[0..4] == &[0x52, 0x49, 0x46, 0x46] &&
157+
&bytes[8..12] == &[0x57, 0x45, 0x42, 0x50] {
158+
return "image/webp";
159+
}
160+
161+
// Check for PDF (Portable Document Format)
162+
if bytes.len() >= 4 && &bytes[0..4] == &[0x25, 0x50, 0x44, 0x46] {
163+
return "application/pdf";
164+
}
165+
166+
// Check for ZIP (including DOCX, XLSX, etc.)
167+
if bytes.len() >= 4 && &bytes[0..4] == &[0x50, 0x4B, 0x03, 0x04] {
168+
// Check for specific ZIP-based formats
169+
if bytes.len() >= 50 {
170+
let content = String::from_utf8_lossy(&bytes[30..50]);
171+
if content.contains("word/") {
172+
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
173+
}
174+
if content.contains("xl/") {
175+
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
176+
}
177+
if content.contains("ppt/") {
178+
return "application/vnd.openxmlformats-officedocument.presentationml.presentation";
179+
}
180+
}
181+
return "application/zip";
182+
}
183+
184+
// Check for JSON (simple heuristic)
185+
if bytes.len() >= 2 {
186+
let start = String::from_utf8_lossy(&bytes[..bytes.len().min(10)]);
187+
let trimmed = start.trim();
188+
if trimmed.starts_with('{') || trimmed.starts_with('[') {
189+
return "application/json";
190+
}
191+
}
192+
193+
// Check for SVG (Scalable Vector Graphics) - must come before XML
194+
if bytes.len() >= 5 {
195+
let start = String::from_utf8_lossy(&bytes[..bytes.len().min(100)]);
196+
let trimmed = start.trim_start();
197+
if trimmed.starts_with("<svg") {
198+
return "image/svg+xml";
199+
}
200+
}
201+
202+
// Check for XML - must come after SVG to avoid conflicts
203+
if bytes.len() >= 5 {
204+
let start = String::from_utf8_lossy(&bytes[..bytes.len().min(20)]);
205+
let trimmed = start.trim_start();
206+
if trimmed.starts_with("<?xml") || trimmed.starts_with('<') {
207+
return "application/xml";
208+
}
209+
}
210+
211+
// Default fallback
212+
"application/octet-stream"
121213
}
122214

123215
/// Converts binary data to a data URL string with a specific MIME type.
@@ -494,6 +586,74 @@ mod tests {
494586
assert_eq!(result, "data:application/octet-stream;base64,AAEC//79");
495587
}
496588

589+
#[test]
590+
fn test_detect_mime_type() {
591+
// Test empty data
592+
assert_eq!(detect_mime_type(&[]), "application/octet-stream");
593+
594+
// Test PNG
595+
let png_data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
596+
assert_eq!(detect_mime_type(&png_data), "image/png");
597+
598+
// Test JPEG
599+
let jpeg_data = [0xFF, 0xD8, 0xFF, 0xE0];
600+
assert_eq!(detect_mime_type(&jpeg_data), "image/jpeg");
601+
602+
// Test GIF87a
603+
let gif87a_data = [0x47, 0x49, 0x46, 0x38, 0x37, 0x61];
604+
assert_eq!(detect_mime_type(&gif87a_data), "image/gif");
605+
606+
// Test GIF89a
607+
let gif89a_data = [0x47, 0x49, 0x46, 0x38, 0x39, 0x61];
608+
assert_eq!(detect_mime_type(&gif89a_data), "image/gif");
609+
610+
// Test BMP
611+
let bmp_data = [0x42, 0x4D, 0x00, 0x00];
612+
assert_eq!(detect_mime_type(&bmp_data), "image/bmp");
613+
614+
// Test PDF
615+
let pdf_data = [0x25, 0x50, 0x44, 0x46, 0x2D];
616+
assert_eq!(detect_mime_type(&pdf_data), "application/pdf");
617+
618+
// Test SVG
619+
let svg_data = b"<svg xmlns=\"http://www.w3.org/2000/svg\">";
620+
assert_eq!(detect_mime_type(svg_data), "image/svg+xml");
621+
622+
// Test XML (non-SVG)
623+
let xml_data = b"<?xml version=\"1.0\"?><root><data>test</data></root>";
624+
assert_eq!(detect_mime_type(xml_data), "application/xml");
625+
626+
// Test JSON
627+
let json_data = b"{\"key\": \"value\"}";
628+
assert_eq!(detect_mime_type(json_data), "application/json");
629+
630+
// Test ZIP
631+
let zip_data = [0x50, 0x4B, 0x03, 0x04];
632+
assert_eq!(detect_mime_type(&zip_data), "application/zip");
633+
634+
// Test unknown data
635+
let unknown_data = [0x00, 0x01, 0x02, 0x03];
636+
assert_eq!(detect_mime_type(&unknown_data), "application/octet-stream");
637+
}
638+
639+
#[test]
640+
fn test_vec_to_data_uri_with_auto_detection() {
641+
// Test PNG auto-detection
642+
let png_data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00];
643+
let result = vec_to_data_uri(&png_data);
644+
assert!(result.starts_with("data:image/png;base64,"));
645+
646+
// Test JPEG auto-detection
647+
let jpeg_data = [0xFF, 0xD8, 0xFF, 0xE0, 0x00];
648+
let result = vec_to_data_uri(&jpeg_data);
649+
assert!(result.starts_with("data:image/jpeg;base64,"));
650+
651+
// Test PDF auto-detection
652+
let pdf_data = [0x25, 0x50, 0x44, 0x46, 0x2D, 0x00];
653+
let result = vec_to_data_uri(&pdf_data);
654+
assert!(result.starts_with("data:application/pdf;base64,"));
655+
}
656+
497657
#[test]
498658
fn test_vec_to_data_uri_with_mime() {
499659
// Test with custom MIME type

0 commit comments

Comments
 (0)