Skip to content

Commit 4335601

Browse files
committed
refactor: Extract MIME type detection into separate module
- Create new mime_detection.rs module for better code organization - Move detect_mime_type() function and tests to dedicated module - Update sql_to_json.rs to import from mime_detection module - Remove unused import from functions.rs - Maintain same functionality with improved code structure Benefits: - Better separation of concerns - Improved code organization and maintainability - Easier to extend MIME detection in the future - Cleaner module boundaries File changes: - NEW: src/webserver/database/mime_detection.rs (MIME detection + tests) - MOD: src/webserver/database/mod.rs (add mime_detection module) - MOD: src/webserver/database/sql_to_json.rs (use mime_detection module) - MOD: src/webserver/database/sqlpage_functions/functions.rs (remove unused import)
1 parent 36e5ce5 commit 4335601

File tree

3 files changed

+114
-120
lines changed

3 files changed

+114
-120
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/// Detects MIME type based on file signatures (magic bytes).
2+
/// Returns the most appropriate MIME type for common file formats.
3+
pub fn detect_mime_type(bytes: &[u8]) -> &'static str {
4+
if bytes.is_empty() {
5+
return "application/octet-stream";
6+
}
7+
8+
// PNG: 89 50 4E 47 0D 0A 1A 0A
9+
if bytes.starts_with(b"\x89PNG\r\n\x1a\n") {
10+
return "image/png";
11+
}
12+
// JPEG: FF D8
13+
if bytes.starts_with(b"\xFF\xD8") {
14+
return "image/jpeg";
15+
}
16+
// GIF87a/89a: GIF87a or GIF89a
17+
if bytes.starts_with(b"GIF87a") || bytes.starts_with(b"GIF89a") {
18+
return "image/gif";
19+
}
20+
// BMP: 42 4D
21+
if bytes.starts_with(b"BM") {
22+
return "image/bmp";
23+
}
24+
// WebP: RIFF....WEBP
25+
if bytes.starts_with(b"RIFF") && bytes.len() >= 12 && &bytes[8..12] == b"WEBP" {
26+
return "image/webp";
27+
}
28+
// PDF: %PDF
29+
if bytes.starts_with(b"%PDF") {
30+
return "application/pdf";
31+
}
32+
// ZIP: 50 4B 03 04
33+
if bytes.starts_with(b"PK\x03\x04") {
34+
// Check for Office document types in ZIP central directory
35+
if bytes.len() >= 50 {
36+
let central_dir = &bytes[30..bytes.len().min(50)];
37+
if central_dir.windows(6).any(|w| w == b"word/") {
38+
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
39+
}
40+
if central_dir.windows(3).any(|w| w == b"xl/") {
41+
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
42+
}
43+
if central_dir.windows(4).any(|w| w == b"ppt/") {
44+
return "application/vnd.openxmlformats-officedocument.presentationml.presentation";
45+
}
46+
}
47+
return "application/zip";
48+
}
49+
50+
// Text-based formats - check first few bytes for ASCII patterns
51+
if bytes.len() >= 1 {
52+
match bytes[0] {
53+
b'<' => {
54+
if bytes.len() >= 4 && bytes.starts_with(b"<svg") {
55+
return "image/svg+xml";
56+
}
57+
if bytes.len() >= 5 && bytes.starts_with(b"<?xml") {
58+
return "application/xml";
59+
}
60+
return "application/xml";
61+
}
62+
b'{' | b'[' => return "application/json",
63+
_ => {}
64+
}
65+
}
66+
67+
"application/octet-stream"
68+
}
69+
70+
#[cfg(test)]
71+
mod tests {
72+
use super::*;
73+
74+
#[test]
75+
fn test_detect_mime_type() {
76+
// Test empty data
77+
assert_eq!(detect_mime_type(&[]), "application/octet-stream");
78+
79+
// Test PNG
80+
assert_eq!(detect_mime_type(b"\x89PNG\r\n\x1a\n"), "image/png");
81+
82+
// Test JPEG
83+
assert_eq!(detect_mime_type(b"\xFF\xD8\xFF\xE0"), "image/jpeg");
84+
85+
// Test GIF87a
86+
assert_eq!(detect_mime_type(b"GIF87a"), "image/gif");
87+
88+
// Test GIF89a
89+
assert_eq!(detect_mime_type(b"GIF89a"), "image/gif");
90+
91+
// Test BMP
92+
assert_eq!(detect_mime_type(b"BM\x00\x00"), "image/bmp");
93+
94+
// Test PDF
95+
assert_eq!(detect_mime_type(b"%PDF-"), "application/pdf");
96+
97+
// Test SVG
98+
assert_eq!(detect_mime_type(b"<svg xmlns=\"http://www.w3.org/2000/svg\">"), "image/svg+xml");
99+
100+
// Test XML (non-SVG)
101+
assert_eq!(detect_mime_type(b"<?xml version=\"1.0\"?><root><data>test</data></root>"), "application/xml");
102+
103+
// Test JSON
104+
assert_eq!(detect_mime_type(b"{\"key\": \"value\"}"), "application/json");
105+
106+
// Test ZIP
107+
assert_eq!(detect_mime_type(b"PK\x03\x04"), "application/zip");
108+
109+
// Test unknown data
110+
assert_eq!(detect_mime_type(&[0x00, 0x01, 0x02, 0x03]), "application/octet-stream");
111+
}
112+
}

src/webserver/database/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ mod connect;
22
mod csv_import;
33
pub mod execute_queries;
44
pub mod migrations;
5+
pub mod mime_detection;
56
mod sql;
67
mod sqlpage_functions;
78
mod syntax_tree;

src/webserver/database/sql_to_json.rs

Lines changed: 1 addition & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -118,77 +118,10 @@ pub fn row_to_string(row: &AnyRow) -> Option<String> {
118118
/// This function is used by both SQL type conversion and file reading functions.
119119
/// Automatically detects common file types based on magic bytes.
120120
pub fn vec_to_data_uri(bytes: &[u8]) -> String {
121-
let mime_type = detect_mime_type(bytes);
121+
let mime_type = crate::webserver::database::mime_detection::detect_mime_type(bytes);
122122
vec_to_data_uri_with_mime(bytes, mime_type)
123123
}
124124

125-
pub fn detect_mime_type(bytes: &[u8]) -> &'static str {
126-
if bytes.is_empty() {
127-
return "application/octet-stream";
128-
}
129-
130-
// PNG: 89 50 4E 47 0D 0A 1A 0A
131-
if bytes.starts_with(b"\x89PNG\r\n\x1a\n") {
132-
return "image/png";
133-
}
134-
// JPEG: FF D8
135-
if bytes.starts_with(b"\xFF\xD8") {
136-
return "image/jpeg";
137-
}
138-
// GIF87a/89a: GIF87a or GIF89a
139-
if bytes.starts_with(b"GIF87a") || bytes.starts_with(b"GIF89a") {
140-
return "image/gif";
141-
}
142-
// BMP: 42 4D
143-
if bytes.starts_with(b"BM") {
144-
return "image/bmp";
145-
}
146-
// WebP: RIFF....WEBP
147-
if bytes.starts_with(b"RIFF") && bytes.len() >= 12 && &bytes[8..12] == b"WEBP" {
148-
return "image/webp";
149-
}
150-
// PDF: %PDF
151-
if bytes.starts_with(b"%PDF") {
152-
return "application/pdf";
153-
}
154-
// ZIP: 50 4B 03 04
155-
if bytes.starts_with(b"PK\x03\x04") {
156-
// Check for Office document types in ZIP central directory
157-
if bytes.len() >= 50 {
158-
let central_dir = &bytes[30..bytes.len().min(50)];
159-
if central_dir.windows(6).any(|w| w == b"word/") {
160-
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
161-
}
162-
if central_dir.windows(3).any(|w| w == b"xl/") {
163-
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
164-
}
165-
if central_dir.windows(4).any(|w| w == b"ppt/") {
166-
return "application/vnd.openxmlformats-officedocument.presentationml.presentation";
167-
}
168-
}
169-
return "application/zip";
170-
}
171-
172-
// Text-based formats - check first few bytes for ASCII patterns
173-
if bytes.len() >= 1 {
174-
match bytes[0] {
175-
b'<' => {
176-
if bytes.len() >= 4 && bytes.starts_with(b"<svg") {
177-
return "image/svg+xml";
178-
}
179-
if bytes.len() >= 5 && bytes.starts_with(b"<?xml") {
180-
return "application/xml";
181-
}
182-
return "application/xml";
183-
}
184-
b'{' | b'[' => return "application/json",
185-
_ => {}
186-
}
187-
}
188-
189-
"application/octet-stream"
190-
}
191-
192125
/// Converts binary data to a data URL string with a specific MIME type.
193126
/// This function is used by both SQL type conversion and file reading functions.
194127
pub fn vec_to_data_uri_with_mime(bytes: &[u8], mime_type: &str) -> String {
@@ -563,59 +496,7 @@ mod tests {
563496
assert_eq!(result, "data:application/octet-stream;base64,AAEC//79");
564497
}
565498

566-
#[test]
567-
fn test_detect_mime_type() {
568-
// Test empty data
569-
assert_eq!(detect_mime_type(&[]), "application/octet-stream");
570-
571-
// Test PNG
572-
assert_eq!(detect_mime_type(b"\x89PNG\r\n\x1a\n"), "image/png");
573-
574-
// Test JPEG
575-
assert_eq!(detect_mime_type(b"\xFF\xD8\xFF\xE0"), "image/jpeg");
576-
577-
// Test GIF87a
578-
assert_eq!(detect_mime_type(b"GIF87a"), "image/gif");
579-
580-
// Test GIF89a
581-
assert_eq!(detect_mime_type(b"GIF89a"), "image/gif");
582-
583-
// Test BMP
584-
assert_eq!(detect_mime_type(b"BM\x00\x00"), "image/bmp");
585-
586-
// Test PDF
587-
assert_eq!(detect_mime_type(b"%PDF-"), "application/pdf");
588499

589-
// Test SVG
590-
assert_eq!(detect_mime_type(b"<svg xmlns=\"http://www.w3.org/2000/svg\">"), "image/svg+xml");
591-
592-
// Test XML (non-SVG)
593-
assert_eq!(detect_mime_type(b"<?xml version=\"1.0\"?><root><data>test</data></root>"), "application/xml");
594-
595-
// Test JSON
596-
assert_eq!(detect_mime_type(b"{\"key\": \"value\"}"), "application/json");
597-
598-
// Test ZIP
599-
assert_eq!(detect_mime_type(b"PK\x03\x04"), "application/zip");
600-
601-
// Test unknown data
602-
assert_eq!(detect_mime_type(&[0x00, 0x01, 0x02, 0x03]), "application/octet-stream");
603-
}
604-
605-
#[test]
606-
fn test_vec_to_data_uri_with_auto_detection() {
607-
// Test PNG auto-detection
608-
let result = vec_to_data_uri(b"\x89PNG\r\n\x1a\n\x00");
609-
assert!(result.starts_with("data:image/png;base64,"));
610-
611-
// Test JPEG auto-detection
612-
let result = vec_to_data_uri(b"\xFF\xD8\xFF\xE0\x00");
613-
assert!(result.starts_with("data:image/jpeg;base64,"));
614-
615-
// Test PDF auto-detection
616-
let result = vec_to_data_uri(b"%PDF-\x00");
617-
assert!(result.starts_with("data:application/pdf;base64,"));
618-
}
619500

620501
#[test]
621502
fn test_vec_to_data_uri_with_mime() {

0 commit comments

Comments
 (0)