|
| 1 | +/// Detects MIME type based on file signatures (magic bytes). |
| 2 | +/// Returns the most appropriate MIME type for common file formats. |
| 3 | +#[must_use] |
| 4 | +pub fn detect_mime_type(bytes: &[u8]) -> &'static str { |
| 5 | + // PNG: 89 50 4E 47 0D 0A 1A 0A |
| 6 | + if bytes.starts_with(b"\x89PNG\r\n\x1a\n") { |
| 7 | + return "image/png"; |
| 8 | + } |
| 9 | + // JPEG: FF D8 |
| 10 | + if bytes.starts_with(b"\xFF\xD8") { |
| 11 | + return "image/jpeg"; |
| 12 | + } |
| 13 | + // GIF87a/89a: GIF87a or GIF89a |
| 14 | + if bytes.starts_with(b"GIF87a") || bytes.starts_with(b"GIF89a") { |
| 15 | + return "image/gif"; |
| 16 | + } |
| 17 | + // BMP: 42 4D |
| 18 | + if bytes.starts_with(b"BM") { |
| 19 | + return "image/bmp"; |
| 20 | + } |
| 21 | + // WebP: RIFF....WEBP |
| 22 | + if bytes.starts_with(b"RIFF") && bytes.len() >= 12 && &bytes[8..12] == b"WEBP" { |
| 23 | + return "image/webp"; |
| 24 | + } |
| 25 | + // PDF: %PDF |
| 26 | + if bytes.starts_with(b"%PDF") { |
| 27 | + return "application/pdf"; |
| 28 | + } |
| 29 | + // ZIP: 50 4B 03 04 |
| 30 | + if bytes.starts_with(b"PK\x03\x04") { |
| 31 | + // Check for Office document types in ZIP central directory |
| 32 | + if bytes.len() >= 50 { |
| 33 | + let central_dir = &bytes[30..bytes.len().min(50)]; |
| 34 | + if central_dir.windows(6).any(|w| w == b"word/") { |
| 35 | + return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; |
| 36 | + } |
| 37 | + if central_dir.windows(3).any(|w| w == b"xl/") { |
| 38 | + return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; |
| 39 | + } |
| 40 | + if central_dir.windows(4).any(|w| w == b"ppt/") { |
| 41 | + return "application/vnd.openxmlformats-officedocument.presentationml.presentation"; |
| 42 | + } |
| 43 | + } |
| 44 | + return "application/zip"; |
| 45 | + } |
| 46 | + |
| 47 | + if bytes.starts_with(b"<?xml") { |
| 48 | + return "application/xml"; |
| 49 | + } |
| 50 | + if bytes.starts_with(b"<svg") || bytes.starts_with(b"<!DOCTYPE svg") { |
| 51 | + return "image/svg+xml"; |
| 52 | + } |
| 53 | + if bytes.starts_with(b"{") || bytes.starts_with(b"[") { |
| 54 | + return "application/json"; |
| 55 | + } |
| 56 | + |
| 57 | + "application/octet-stream" |
| 58 | +} |
| 59 | + |
| 60 | +/// Converts binary data to a data URL string. |
| 61 | +/// This function is used by both SQL type conversion and file reading functions. |
| 62 | +/// Automatically detects common file types based on magic bytes. |
| 63 | +#[must_use] |
| 64 | +pub fn vec_to_data_uri(bytes: &[u8]) -> String { |
| 65 | + let mime_type = detect_mime_type(bytes); |
| 66 | + vec_to_data_uri_with_mime(bytes, mime_type) |
| 67 | +} |
| 68 | + |
| 69 | +/// Converts binary data to a data URL string with a specific MIME type. |
| 70 | +/// This function is used by both SQL type conversion and file reading functions. |
| 71 | +#[must_use] |
| 72 | +pub fn vec_to_data_uri_with_mime(bytes: &[u8], mime_type: &str) -> String { |
| 73 | + let mut data_url = format!("data:{mime_type};base64,"); |
| 74 | + base64::Engine::encode_string( |
| 75 | + &base64::engine::general_purpose::STANDARD, |
| 76 | + bytes, |
| 77 | + &mut data_url, |
| 78 | + ); |
| 79 | + data_url |
| 80 | +} |
| 81 | + |
| 82 | +/// Converts binary data to a data URL JSON value. |
| 83 | +/// This is a convenience function for SQL type conversion. |
| 84 | +#[must_use] |
| 85 | +pub fn vec_to_data_uri_value(bytes: &[u8]) -> serde_json::Value { |
| 86 | + serde_json::Value::String(vec_to_data_uri(bytes)) |
| 87 | +} |
| 88 | + |
| 89 | +#[cfg(test)] |
| 90 | +mod tests { |
| 91 | + use super::*; |
| 92 | + |
| 93 | + #[test] |
| 94 | + fn test_detect_mime_type() { |
| 95 | + // Test empty data |
| 96 | + assert_eq!(detect_mime_type(&[]), "application/octet-stream"); |
| 97 | + |
| 98 | + // Test PNG |
| 99 | + assert_eq!(detect_mime_type(b"\x89PNG\r\n\x1a\n"), "image/png"); |
| 100 | + |
| 101 | + // Test JPEG |
| 102 | + assert_eq!(detect_mime_type(b"\xFF\xD8\xFF\xE0"), "image/jpeg"); |
| 103 | + |
| 104 | + // Test GIF87a |
| 105 | + assert_eq!(detect_mime_type(b"GIF87a"), "image/gif"); |
| 106 | + |
| 107 | + // Test GIF89a |
| 108 | + assert_eq!(detect_mime_type(b"GIF89a"), "image/gif"); |
| 109 | + |
| 110 | + // Test BMP |
| 111 | + assert_eq!(detect_mime_type(b"BM\x00\x00"), "image/bmp"); |
| 112 | + |
| 113 | + // Test PDF |
| 114 | + assert_eq!(detect_mime_type(b"%PDF-"), "application/pdf"); |
| 115 | + |
| 116 | + // Test SVG |
| 117 | + assert_eq!( |
| 118 | + detect_mime_type(b"<svg xmlns=\"http://www.w3.org/2000/svg\">"), |
| 119 | + "image/svg+xml" |
| 120 | + ); |
| 121 | + |
| 122 | + // Test XML (non-SVG) |
| 123 | + assert_eq!( |
| 124 | + detect_mime_type(b"<?xml version=\"1.0\"?><root><data>test</data></root>"), |
| 125 | + "application/xml" |
| 126 | + ); |
| 127 | + |
| 128 | + // Test JSON |
| 129 | + assert_eq!( |
| 130 | + detect_mime_type(b"{\"key\": \"value\"}"), |
| 131 | + "application/json" |
| 132 | + ); |
| 133 | + |
| 134 | + // Test ZIP |
| 135 | + assert_eq!(detect_mime_type(b"PK\x03\x04"), "application/zip"); |
| 136 | + |
| 137 | + // Test unknown data |
| 138 | + assert_eq!( |
| 139 | + detect_mime_type(&[0x00, 0x01, 0x02, 0x03]), |
| 140 | + "application/octet-stream" |
| 141 | + ); |
| 142 | + } |
| 143 | + |
| 144 | + #[test] |
| 145 | + fn test_vec_to_data_uri() { |
| 146 | + // Test with empty bytes |
| 147 | + let result = vec_to_data_uri(&[]); |
| 148 | + assert_eq!(result, "data:application/octet-stream;base64,"); |
| 149 | + |
| 150 | + // Test with simple text |
| 151 | + let result = vec_to_data_uri(b"Hello World"); |
| 152 | + assert_eq!( |
| 153 | + result, |
| 154 | + "data:application/octet-stream;base64,SGVsbG8gV29ybGQ=" |
| 155 | + ); |
| 156 | + |
| 157 | + // Test with binary data |
| 158 | + let binary_data = [0, 1, 2, 255, 254, 253]; |
| 159 | + let result = vec_to_data_uri(&binary_data); |
| 160 | + assert_eq!(result, "data:application/octet-stream;base64,AAEC//79"); |
| 161 | + } |
| 162 | + |
| 163 | + #[test] |
| 164 | + fn test_vec_to_data_uri_with_mime() { |
| 165 | + // Test with custom MIME type |
| 166 | + let result = vec_to_data_uri_with_mime(b"Hello", "text/plain"); |
| 167 | + assert_eq!(result, "data:text/plain;base64,SGVsbG8="); |
| 168 | + |
| 169 | + // Test with image MIME type |
| 170 | + let result = vec_to_data_uri_with_mime(&[255, 216, 255], "image/jpeg"); |
| 171 | + assert_eq!(result, "data:image/jpeg;base64,/9j/"); |
| 172 | + |
| 173 | + // Test with empty bytes and custom MIME |
| 174 | + let result = vec_to_data_uri_with_mime(&[], "application/json"); |
| 175 | + assert_eq!(result, "data:application/json;base64,"); |
| 176 | + } |
| 177 | + |
| 178 | + #[test] |
| 179 | + fn test_vec_to_data_uri_value() { |
| 180 | + // Test that it returns a JSON string value |
| 181 | + let result = vec_to_data_uri_value(b"test"); |
| 182 | + match result { |
| 183 | + serde_json::Value::String(s) => { |
| 184 | + assert_eq!(s, "data:application/octet-stream;base64,dGVzdA=="); |
| 185 | + } |
| 186 | + _ => panic!("Expected String value"), |
| 187 | + } |
| 188 | + } |
| 189 | +} |
0 commit comments