diff --git a/unpack.c b/unpack.c index 8756e91..bf15025 100644 --- a/unpack.c +++ b/unpack.c @@ -5,6 +5,7 @@ #include #include #include +#include typedef struct { char* filename; @@ -19,6 +20,13 @@ typedef struct { char error_message[256]; } ExtractedArchive; + +double get_time() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec) + tv.tv_usec / 1000000.0; +} + ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_message, struct archive* archive) { if (!result || !archive) { @@ -34,7 +42,7 @@ ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_mess } EMSCRIPTEN_KEEPALIVE -ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { +ExtractedArchive* extract_archive(uint8_t* input_data, size_t input_size ) { struct archive* archive; struct archive_entry* entry; size_t files_struct_length = 100; @@ -56,7 +64,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { archive_read_support_filter_all(archive); archive_read_support_format_all(archive); - if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) { + if (archive_read_open_memory(archive, input_data, input_size) != ARCHIVE_OK) { return error_handler(result,archive_error_string(archive), archive); } files = malloc(sizeof(FileData) * files_struct_length); @@ -137,32 +145,51 @@ char* write_to_temp_file(uint8_t* data, size_t size) { free(temp_file_name); return NULL; } + if (fclose(temp_file) != 0) { + perror("Failed to close temporary file"); + unlink(temp_file_name); + free(temp_file_name); + return NULL; + } - fclose(temp_file); return temp_file_name; } EMSCRIPTEN_KEEPALIVE -ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { +ExtractedArchive* decompression(uint8_t* input_data, size_t input_size) { struct archive* archive; struct archive_entry* entry; size_t files_count = 0; - - const size_t buffsize = 64 * 1024; - char buff[buffsize]; - size_t total_size = 0; + size_t total_size = 0; const char *error_message; + size_t files_struct_length = 1; + size_t compression_ratio = 10; + size_t estimated_decompressed_size = input_size * compression_ratio; - FileData* files = malloc(sizeof(FileData) * (files_count + 1)); + /* 64KB, archive_read_data can do realloc too during reading data, + so data chunks size should be defined carefully. There is memory leaks with 4MB data chunck size + */ + const size_t buffsize = 65536; + char* buff = (char*)malloc(buffsize); + double start_time, end_time; + + if (!buff) { + printf("Failed to allocate memory for decompression buffer\n"); + return NULL; + } + + FileData* files = malloc(sizeof(FileData) * files_struct_length); if (!files) { printf("Failed to allocate memory for files array\n"); + free(buff); return NULL; } - + ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive)); if (!result) { free(files); + free(buff); return NULL; } @@ -171,9 +198,10 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { result->status = 1; result->error_message[0] = '\0'; - char* temp_file_name = write_to_temp_file(inputData, inputSize); + char* temp_file_name = write_to_temp_file(input_data, input_size); if (!temp_file_name) { free(files); + free(buff); error_message = "Failed to create temporary file"; return error_handler(result, error_message, archive); } @@ -182,69 +210,126 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { archive_read_support_filter_all(archive); archive_read_support_format_raw(archive); - if (archive_read_open_filename(archive, temp_file_name, inputSize) != ARCHIVE_OK) { + /* Putting buffer size allows libarchive to read a file by data chunks. + This reduces memory leaks on libarchive side + */ + + if (archive_read_open_filename(archive, temp_file_name, buffsize) != ARCHIVE_OK) { unlink(temp_file_name); free(temp_file_name); free(files); + free(buff); return error_handler(result, archive_error_string(archive), archive); } - + while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { + if (files_count + 1 > files_struct_length) { + files_struct_length *= 2; // double the length + FileData* oldfiles = files; + files= realloc(files, sizeof(FileData) * files_struct_length); + if (!files) { + unlink(temp_file_name); + free(temp_file_name); + result->fileCount = files_count; + result->files = oldfiles; + error_message = "Memory allocation error for file data."; + return error_handler(result, error_message, archive); + } + } + const char* filename = archive_entry_pathname(entry); - if (!filename) filename = "decompression"; + if (!filename) filename = "data"; files[files_count].filename = strdup(filename); - files[files_count].data = NULL; - files[files_count].data_size = 0; + files[files_count].data = malloc(estimated_decompressed_size); + + if (!files[files_count].data) { + free(files[files_count].filename); + unlink(temp_file_name); + free(temp_file_name); + free(buff); + files[files_count].filename = NULL; + result->fileCount = files_count; + result->files = files; + + error_message = "Memory allocation error for file contents."; + return error_handler(result, error_message, archive); + } + files[files_count].data_size = buffsize; ssize_t ret; + total_size = 0; - for (;;) { + while (1) { ret = archive_read_data(archive, buff, buffsize); + if (ret < 0) { for (size_t i = 0; i <= files_count; i++) { - free(files[i].filename); - free(files[i].data); - } - free(files); - result->files = NULL; - return error_handler(result, archive_error_string(archive), archive); + free(files[i].filename); + free(files[i].data); + } + free(files); + free(buff); + unlink(temp_file_name); + free(temp_file_name); + result->files = NULL; + result = error_handler(result, archive_error_string(archive), archive); + break; } if (ret == 0) { break; } - void* new_data = realloc(files[files_count].data, total_size + ret); - if (!new_data) { - free(files[files_count].data); - error_message = "Memory allocation error"; - return error_handler(result, error_message, archive); + size_t sum = total_size + ret; + if (sum > estimated_decompressed_size) { + size_t new_size = estimated_decompressed_size * 1.5; + void* new_data = realloc(files[files_count].data, new_size);//? + if (!new_data) { + for (size_t i = 0; i <= files_count; i++) { + free(files[i].filename); + free(files[i].data); + } + + result->files = NULL; + result->fileCount = 0; + free(files); + free(buff); + unlink(temp_file_name); + free(temp_file_name); + error_message = "Memory allocation error"; + result = error_handler(result, error_message, archive); + break; + } + + files[files_count].data = new_data; + estimated_decompressed_size = new_size; + } - files[files_count].data = new_data; memcpy(files[files_count].data + total_size, buff, ret); total_size += ret; } + files[files_count].data_size = total_size; files_count++; + free(buff); } - archive_read_free(archive); unlink(temp_file_name); free(temp_file_name); - result->files = files; result->fileCount = files_count; - result->status = 1; + result->status = 1; return result; } + EMSCRIPTEN_KEEPALIVE -ExtractedArchive* extract(uint8_t* inputData, size_t inputSize, bool decompressionOnly ) { - if (!decompressionOnly) { - return extract_archive(inputData, inputSize); +ExtractedArchive* extract(uint8_t* input_data, size_t input_size, bool decompression_only ) { + if (!decompression_only) { + return extract_archive(input_data, input_size); } else { - return decompression(inputData, inputSize); + return decompression(input_data, input_size); } }