1+ #include <libgen.h>
12#include <stdio.h>
23#include <string.h>
34#include <stdlib.h>
@@ -19,6 +20,11 @@ typedef struct {
1920 char error_message [256 ];
2021} ExtractedArchive ;
2122
23+ typedef struct {
24+ char * linkname ;
25+ char * target ;
26+ } SymlinkInfo ;
27+
2228ExtractedArchive * error_handler (ExtractedArchive * result , const char * error_message , struct archive * archive ) {
2329
2430 if (!result || !archive ) {
@@ -33,14 +39,50 @@ ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_mess
3339 return result ;
3440}
3541
42+ static char * join_paths (const char * dir , const char * relative ) {
43+ if (!dir || !* dir ) return strdup (relative );
44+ size_t len = strlen (dir ) + 1 + strlen (relative ) + 1 ;
45+ char * buf = malloc (len );
46+ snprintf (buf , len , "%s/%s" , dir , relative );
47+ return buf ;
48+ }
49+
50+ static const FileData * resolve_target_recursive (const FileData * files , size_t file_count ,
51+ const SymlinkInfo * symlinks , size_t symlink_count ,
52+ const char * target , int depth )
53+ {
54+ if (!target || depth > 32 ) // prevent infinite recursion
55+ return NULL ;
56+
57+ // First, check if target is a regular file
58+ for (size_t i = 0 ; i < file_count ; i ++ ) {
59+ if (strcmp (files [i ].filename , target ) == 0 ) {
60+ if (files [i ].data && files [i ].data_size > 0 ) {
61+ return & files [i ]; // Found real file
62+ }
63+ }
64+ }
65+
66+ // If not found among files, maybe it's another symlink
67+ for (size_t i = 0 ; i < symlink_count ; i ++ ) {
68+ if (strcmp (symlinks [i ].linkname , target ) == 0 ) {
69+ // Recurse into that symlink's target
70+ return resolve_target_recursive (files , file_count , symlinks , symlink_count ,
71+ symlinks [i ].target , depth + 1 );
72+ }
73+ }
74+
75+ return NULL ; // Not found
76+ }
77+
3678EMSCRIPTEN_KEEPALIVE
3779ExtractedArchive * extract_archive (uint8_t * inputData , size_t inputSize ) {
3880 struct archive * archive ;
3981 struct archive_entry * entry ;
4082 size_t files_struct_length = 100 ;
4183 FileData * files = NULL ;
4284 size_t files_count = 0 ;
43- const char * error_message ;
85+ const char * error_message ;
4486 bool hasSymLinks = false;
4587
4688 ExtractedArchive * result = (ExtractedArchive * )malloc (sizeof (ExtractedArchive ));
@@ -75,7 +117,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) {
75117 if (files_count + 1 > files_struct_length ) {
76118 files_struct_length *= 2 ; // double the length
77119 FileData * oldfiles = files ;
78- files = realloc (files , sizeof (FileData ) * files_struct_length );
120+ files = realloc (files , sizeof (FileData ) * files_struct_length );
79121 if (!files ) {
80122 result -> fileCount = files_count ;
81123 result -> files = oldfiles ; // otherwise memory is lost, alternatively also everything can be freed.
@@ -116,7 +158,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) {
116158
117159 // Resolve symlinks
118160 if (hasSymLinks ) {
119- // Rewind and reopen the archive to iterate over symlinks
161+ // Reopen the archive to iterate over symlinks
120162 archive_read_free (archive );
121163 archive = archive_read_new ();
122164 archive_read_support_filter_all (archive );
@@ -127,35 +169,54 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) {
127169 }
128170
129171 struct archive_entry * symlink_entry ;
172+
173+ size_t symlink_count = 0 ;
174+ size_t symlink_alloc = 16 ;
175+ SymlinkInfo * symlinks = malloc (sizeof (SymlinkInfo ) * symlink_alloc );
176+
177+ // Collect all symlink entries
130178 while (archive_read_next_header (archive , & symlink_entry ) == ARCHIVE_OK ) {
131- // Process only symlinks this time
132- if (archive_entry_filetype (symlink_entry ) != AE_IFLNK ) {
179+ if (archive_entry_filetype (symlink_entry ) != AE_IFLNK )
133180 continue ;
134- }
135181
136- const char * linkname = archive_entry_pathname (symlink_entry );
137- const char * target = archive_entry_symlink (symlink_entry );
182+ const char * tgt = archive_entry_symlink (symlink_entry );
138183
139- // Target not found
140- if (!target ) {
184+ if (!tgt ) {
141185 continue ;
142186 }
143187
144- // Find the target file in the already populated files[]
145- size_t target_index = (size_t )-1 ;
146- for (size_t i = 0 ; i < files_count ; i ++ ) {
147- if (strcmp (files [i ].filename , target ) == 0 ) {
148- target_index = i ;
149- break ;
150- }
188+ if (symlink_count + 1 > symlink_alloc ) {
189+ symlink_alloc *= 2 ;
190+ symlinks = realloc (symlinks , sizeof (SymlinkInfo ) * symlink_alloc );
151191 }
152192
153- // Target not found in the processed files
154- if (target_index == (size_t )-1 || !files [target_index ].data ) {
155- continue ;
193+ // Compute directory of the symlink
194+ char * link_dir = strdup (archive_entry_pathname (symlink_entry ));
195+ char * dir = dirname (link_dir );
196+ char * resolved_target_path = join_paths (dir , tgt );
197+ free (dir );
198+ free (link_dir );
199+
200+ symlinks [symlink_count ].linkname = strdup (archive_entry_pathname (symlink_entry ));
201+ symlinks [symlink_count ].target = strdup (resolved_target_path );
202+ symlink_count ++ ;
203+ }
204+
205+ // Resolve and populate symlinks
206+ for (size_t i = 0 ; i < symlink_count ; i ++ ) {
207+ const char * linkname = symlinks [i ].linkname ;
208+ const char * target = symlinks [i ].target ;
209+
210+ const FileData * resolved = resolve_target_recursive (files , files_count ,
211+ symlinks , symlink_count ,
212+ target , 0 );
213+
214+ if (!resolved ) {
215+ // error_message = "Failed to resolve symlink.";
216+ error_message = target ;
217+ return error_handler (result , error_message , archive );
156218 }
157219
158- // Add the symlink entry
159220 if (files_count + 1 > files_struct_length ) {
160221 files_struct_length *= 2 ;
161222 FileData * oldfiles = files ;
@@ -169,20 +230,19 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) {
169230 }
170231
171232 files [files_count ].filename = strdup (linkname );
172- files [files_count ].data_size = files [target_index ].data_size ;
173- files [files_count ].data = malloc (files [target_index ].data_size );
174- if (!files [files_count ].data ) {
175- free (files [files_count ].filename );
176- files [files_count ].filename = NULL ;
177- result -> fileCount = files_count ;
178- result -> files = files ;
179- error_message = "Memory allocation error for symlink target data." ;
180- return error_handler (result , error_message , archive );
181- }
182- memcpy (files [files_count ].data , files [target_index ].data , files [target_index ].data_size );
233+
234+ files [files_count ].data_size = resolved -> data_size ;
235+ files [files_count ].data = malloc (resolved -> data_size );
236+ memcpy (files [files_count ].data , resolved -> data , resolved -> data_size );
183237
184238 files_count ++ ;
185239 }
240+
241+ for (size_t i = 0 ; i < symlink_count ; i ++ ) {
242+ free (symlinks [i ].linkname );
243+ free (symlinks [i ].target );
244+ }
245+ free (symlinks );
186246 }
187247
188248 archive_read_free (archive );
0 commit comments