1+ import { TEXT_FILE_EXTENSION_SET } from 'clawdhub-schema'
12import { gunzipSync , unzipSync } from 'fflate'
23
34const TEXT_TYPES = new Map ( [
@@ -23,53 +24,77 @@ export async function expandFiles(selected: File[]) {
2324 const lower = file . name . toLowerCase ( )
2425 if ( lower . endsWith ( '.zip' ) ) {
2526 const entries = unzipSync ( new Uint8Array ( await readArrayBuffer ( file ) ) )
26- for ( const [ path , data ] of Object . entries ( entries ) ) {
27- if ( ! path || path . endsWith ( '/' ) ) continue
28- expanded . push (
29- new File ( [ data . buffer ] , normalizePath ( path ) , {
30- type : guessContentType ( path ) ,
31- } ) ,
32- )
33- }
27+ pushArchiveEntries (
28+ expanded ,
29+ Object . entries ( entries ) . map ( ( [ path , data ] ) => ( { path, data } ) ) ,
30+ )
3431 continue
3532 }
3633 if ( lower . endsWith ( '.tar.gz' ) || lower . endsWith ( '.tgz' ) ) {
3734 const unpacked = gunzipSync ( new Uint8Array ( await readArrayBuffer ( file ) ) )
38- for ( const entry of untar ( unpacked ) ) {
39- expanded . push (
40- new File ( [ entry . data . buffer ] , normalizePath ( entry . path ) , {
41- type : guessContentType ( entry . path ) ,
42- } ) ,
43- )
44- }
35+ pushArchiveEntries ( expanded , untar ( unpacked ) )
4536 continue
4637 }
4738 if ( lower . endsWith ( '.gz' ) ) {
4839 const unpacked = gunzipSync ( new Uint8Array ( await readArrayBuffer ( file ) ) )
4940 const name = file . name . replace ( / \. g z $ / i, '' )
50- expanded . push ( new File ( [ unpacked . buffer ] , name , { type : guessContentType ( name ) } ) )
41+ expanded . push ( new File ( [ unpacked ] , name , { type : guessContentType ( name ) } ) )
5142 continue
5243 }
5344 expanded . push ( file )
5445 }
5546 return expanded
5647}
5748
49+ function pushArchiveEntries ( target : File [ ] , entries : Array < { path : string ; data : Uint8Array } > ) {
50+ const normalized = entries
51+ . map ( ( entry ) => ( { ...entry , path : normalizePath ( entry . path ) } ) )
52+ . filter ( ( entry ) => entry . path && ! entry . path . endsWith ( '/' ) )
53+ . filter ( ( entry ) => ! isJunkPath ( entry . path ) )
54+ . filter ( ( entry ) => isTextPath ( entry . path ) )
55+
56+ const unwrapped = unwrapSingleTopLevelFolder ( normalized )
57+
58+ for ( const entry of unwrapped ) {
59+ target . push (
60+ new File ( [ entry . data ] , entry . path , {
61+ type : guessContentType ( entry . path ) ,
62+ } ) ,
63+ )
64+ }
65+ }
66+
5867async function readArrayBuffer ( file : Blob ) {
5968 if ( typeof file . arrayBuffer === 'function' ) {
6069 return file . arrayBuffer ( )
6170 }
62- return new Response ( file ) . arrayBuffer ( )
71+ if ( typeof FileReader !== 'undefined' ) {
72+ return new Promise < ArrayBuffer > ( ( resolve , reject ) => {
73+ const reader = new FileReader ( )
74+ reader . onerror = ( ) => reject ( reader . error ?? new Error ( 'Could not read file.' ) )
75+ reader . onload = ( ) => resolve ( reader . result as ArrayBuffer )
76+ reader . readAsArrayBuffer ( file )
77+ } )
78+ }
79+ return new Response ( file as BodyInit ) . arrayBuffer ( )
6380}
6481
6582function guessContentType ( path : string ) {
6683 const ext = path . split ( '.' ) . pop ( ) ?. toLowerCase ( )
67- if ( ! ext ) return 'text/plain'
68- return TEXT_TYPES . get ( ext ) ?? 'text/plain'
84+ if ( ! ext ) return 'application/octet-stream'
85+ const known = TEXT_TYPES . get ( ext )
86+ if ( known ) return known
87+ if ( TEXT_FILE_EXTENSION_SET . has ( ext ) ) return 'text/plain'
88+ return 'application/octet-stream'
6989}
7090
7191function normalizePath ( path : string ) {
72- return path . replace ( / ^ \. \/ + / , '' ) . replace ( / ^ \/ + / , '' )
92+ return path
93+ . replaceAll ( '\u0000' , '' )
94+ . replaceAll ( '\\' , '/' )
95+ . trim ( )
96+ . replace ( / ^ \. \/ + / , '' )
97+ . replace ( / ^ \/ + / , '' )
7398}
7499
75100function untar ( bytes : Uint8Array ) {
@@ -100,3 +125,35 @@ function readOctal(bytes: Uint8Array) {
100125 const raw = readString ( bytes )
101126 return raw ? Number . parseInt ( raw , 8 ) : 0
102127}
128+
129+ function unwrapSingleTopLevelFolder < T extends { path : string } > ( entries : T [ ] ) {
130+ if ( entries . length === 0 ) return entries
131+
132+ const segments = entries . map ( ( entry ) => entry . path . split ( '/' ) . filter ( Boolean ) )
133+ if ( segments . some ( ( parts ) => parts . length < 2 ) ) return entries
134+
135+ const first = segments [ 0 ] ?. [ 0 ]
136+ if ( ! first ) return entries
137+ if ( ! segments . every ( ( parts ) => parts [ 0 ] === first ) ) return entries
138+
139+ return entries . map ( ( entry ) => ( {
140+ ...entry ,
141+ path : entry . path . split ( '/' ) . slice ( 1 ) . join ( '/' ) ,
142+ } ) )
143+ }
144+
145+ function isJunkPath ( path : string ) {
146+ const normalized = path . toLowerCase ( )
147+ if ( normalized . startsWith ( '__macosx/' ) ) return true
148+ if ( normalized . endsWith ( '/.ds_store' ) ) return true
149+ if ( normalized === '.ds_store' ) return true
150+ return false
151+ }
152+
153+ function isTextPath ( path : string ) {
154+ const normalized = path . trim ( ) . toLowerCase ( )
155+ const parts = normalized . split ( '.' )
156+ const extension = parts . length > 1 ? ( parts . at ( - 1 ) ?? '' ) : ''
157+ if ( ! extension ) return false
158+ return TEXT_FILE_EXTENSION_SET . has ( extension )
159+ }
0 commit comments