@@ -116,8 +116,100 @@ pub fn row_to_string(row: &AnyRow) -> Option<String> {
116116
117117/// Converts binary data to a data URL string.
118118/// This function is used by both SQL type conversion and file reading functions.
119+ /// Automatically detects common file types based on magic bytes.
119120pub fn vec_to_data_uri ( bytes : & [ u8 ] ) -> String {
120- vec_to_data_uri_with_mime ( bytes, "application/octet-stream" )
121+ let mime_type = detect_mime_type ( bytes) ;
122+ vec_to_data_uri_with_mime ( bytes, mime_type)
123+ }
124+
125+ /// Detects MIME type based on file signatures (magic bytes).
126+ /// Returns the most appropriate MIME type for common file formats.
127+ pub fn detect_mime_type ( bytes : & [ u8 ] ) -> & ' static str {
128+ if bytes. is_empty ( ) {
129+ return "application/octet-stream" ;
130+ }
131+
132+ // Check for PNG (Portable Network Graphics)
133+ if bytes. len ( ) >= 8 && & bytes[ 0 ..8 ] == & [ 0x89 , 0x50 , 0x4E , 0x47 , 0x0D , 0x0A , 0x1A , 0x0A ] {
134+ return "image/png" ;
135+ }
136+
137+ // Check for JPEG (Joint Photographic Experts Group)
138+ if bytes. len ( ) >= 2 && & bytes[ 0 ..2 ] == & [ 0xFF , 0xD8 ] {
139+ return "image/jpeg" ;
140+ }
141+
142+ // Check for GIF (Graphics Interchange Format)
143+ if bytes. len ( ) >= 6 {
144+ if & bytes[ 0 ..6 ] == & [ 0x47 , 0x49 , 0x46 , 0x38 , 0x37 , 0x61 ] || // GIF87a
145+ & bytes[ 0 ..6 ] == & [ 0x47 , 0x49 , 0x46 , 0x38 , 0x39 , 0x61 ] { // GIF89a
146+ return "image/gif" ;
147+ }
148+ }
149+
150+ // Check for BMP (Bitmap)
151+ if bytes. len ( ) >= 2 && & bytes[ 0 ..2 ] == & [ 0x42 , 0x4D ] {
152+ return "image/bmp" ;
153+ }
154+
155+ // Check for WebP
156+ if bytes. len ( ) >= 12 && & bytes[ 0 ..4 ] == & [ 0x52 , 0x49 , 0x46 , 0x46 ] &&
157+ & bytes[ 8 ..12 ] == & [ 0x57 , 0x45 , 0x42 , 0x50 ] {
158+ return "image/webp" ;
159+ }
160+
161+ // Check for PDF (Portable Document Format)
162+ if bytes. len ( ) >= 4 && & bytes[ 0 ..4 ] == & [ 0x25 , 0x50 , 0x44 , 0x46 ] {
163+ return "application/pdf" ;
164+ }
165+
166+ // Check for ZIP (including DOCX, XLSX, etc.)
167+ if bytes. len ( ) >= 4 && & bytes[ 0 ..4 ] == & [ 0x50 , 0x4B , 0x03 , 0x04 ] {
168+ // Check for specific ZIP-based formats
169+ if bytes. len ( ) >= 50 {
170+ let content = String :: from_utf8_lossy ( & bytes[ 30 ..50 ] ) ;
171+ if content. contains ( "word/" ) {
172+ return "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ;
173+ }
174+ if content. contains ( "xl/" ) {
175+ return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ;
176+ }
177+ if content. contains ( "ppt/" ) {
178+ return "application/vnd.openxmlformats-officedocument.presentationml.presentation" ;
179+ }
180+ }
181+ return "application/zip" ;
182+ }
183+
184+ // Check for JSON (simple heuristic)
185+ if bytes. len ( ) >= 2 {
186+ let start = String :: from_utf8_lossy ( & bytes[ ..bytes. len ( ) . min ( 10 ) ] ) ;
187+ let trimmed = start. trim ( ) ;
188+ if trimmed. starts_with ( '{' ) || trimmed. starts_with ( '[' ) {
189+ return "application/json" ;
190+ }
191+ }
192+
193+ // Check for SVG (Scalable Vector Graphics) - must come before XML
194+ if bytes. len ( ) >= 5 {
195+ let start = String :: from_utf8_lossy ( & bytes[ ..bytes. len ( ) . min ( 100 ) ] ) ;
196+ let trimmed = start. trim_start ( ) ;
197+ if trimmed. starts_with ( "<svg" ) {
198+ return "image/svg+xml" ;
199+ }
200+ }
201+
202+ // Check for XML - must come after SVG to avoid conflicts
203+ if bytes. len ( ) >= 5 {
204+ let start = String :: from_utf8_lossy ( & bytes[ ..bytes. len ( ) . min ( 20 ) ] ) ;
205+ let trimmed = start. trim_start ( ) ;
206+ if trimmed. starts_with ( "<?xml" ) || trimmed. starts_with ( '<' ) {
207+ return "application/xml" ;
208+ }
209+ }
210+
211+ // Default fallback
212+ "application/octet-stream"
121213}
122214
123215/// Converts binary data to a data URL string with a specific MIME type.
@@ -494,6 +586,74 @@ mod tests {
494586 assert_eq ! ( result, "data:application/octet-stream;base64,AAEC//79" ) ;
495587 }
496588
589+ #[ test]
590+ fn test_detect_mime_type ( ) {
591+ // Test empty data
592+ assert_eq ! ( detect_mime_type( & [ ] ) , "application/octet-stream" ) ;
593+
594+ // Test PNG
595+ let png_data = [ 0x89 , 0x50 , 0x4E , 0x47 , 0x0D , 0x0A , 0x1A , 0x0A ] ;
596+ assert_eq ! ( detect_mime_type( & png_data) , "image/png" ) ;
597+
598+ // Test JPEG
599+ let jpeg_data = [ 0xFF , 0xD8 , 0xFF , 0xE0 ] ;
600+ assert_eq ! ( detect_mime_type( & jpeg_data) , "image/jpeg" ) ;
601+
602+ // Test GIF87a
603+ let gif87a_data = [ 0x47 , 0x49 , 0x46 , 0x38 , 0x37 , 0x61 ] ;
604+ assert_eq ! ( detect_mime_type( & gif87a_data) , "image/gif" ) ;
605+
606+ // Test GIF89a
607+ let gif89a_data = [ 0x47 , 0x49 , 0x46 , 0x38 , 0x39 , 0x61 ] ;
608+ assert_eq ! ( detect_mime_type( & gif89a_data) , "image/gif" ) ;
609+
610+ // Test BMP
611+ let bmp_data = [ 0x42 , 0x4D , 0x00 , 0x00 ] ;
612+ assert_eq ! ( detect_mime_type( & bmp_data) , "image/bmp" ) ;
613+
614+ // Test PDF
615+ let pdf_data = [ 0x25 , 0x50 , 0x44 , 0x46 , 0x2D ] ;
616+ assert_eq ! ( detect_mime_type( & pdf_data) , "application/pdf" ) ;
617+
618+ // Test SVG
619+ let svg_data = b"<svg xmlns=\" http://www.w3.org/2000/svg\" >" ;
620+ assert_eq ! ( detect_mime_type( svg_data) , "image/svg+xml" ) ;
621+
622+ // Test XML (non-SVG)
623+ let xml_data = b"<?xml version=\" 1.0\" ?><root><data>test</data></root>" ;
624+ assert_eq ! ( detect_mime_type( xml_data) , "application/xml" ) ;
625+
626+ // Test JSON
627+ let json_data = b"{\" key\" : \" value\" }" ;
628+ assert_eq ! ( detect_mime_type( json_data) , "application/json" ) ;
629+
630+ // Test ZIP
631+ let zip_data = [ 0x50 , 0x4B , 0x03 , 0x04 ] ;
632+ assert_eq ! ( detect_mime_type( & zip_data) , "application/zip" ) ;
633+
634+ // Test unknown data
635+ let unknown_data = [ 0x00 , 0x01 , 0x02 , 0x03 ] ;
636+ assert_eq ! ( detect_mime_type( & unknown_data) , "application/octet-stream" ) ;
637+ }
638+
639+ #[ test]
640+ fn test_vec_to_data_uri_with_auto_detection ( ) {
641+ // Test PNG auto-detection
642+ let png_data = [ 0x89 , 0x50 , 0x4E , 0x47 , 0x0D , 0x0A , 0x1A , 0x0A , 0x00 ] ;
643+ let result = vec_to_data_uri ( & png_data) ;
644+ assert ! ( result. starts_with( "data:image/png;base64," ) ) ;
645+
646+ // Test JPEG auto-detection
647+ let jpeg_data = [ 0xFF , 0xD8 , 0xFF , 0xE0 , 0x00 ] ;
648+ let result = vec_to_data_uri ( & jpeg_data) ;
649+ assert ! ( result. starts_with( "data:image/jpeg;base64," ) ) ;
650+
651+ // Test PDF auto-detection
652+ let pdf_data = [ 0x25 , 0x50 , 0x44 , 0x46 , 0x2D , 0x00 ] ;
653+ let result = vec_to_data_uri ( & pdf_data) ;
654+ assert ! ( result. starts_with( "data:application/pdf;base64," ) ) ;
655+ }
656+
497657 #[ test]
498658 fn test_vec_to_data_uri_with_mime ( ) {
499659 // Test with custom MIME type
0 commit comments