|
| 1 | +package com.datamate.common.domain.utils; |
| 2 | + |
| 3 | +import com.datamate.common.domain.model.FileUploadResult; |
| 4 | +import com.datamate.common.infrastructure.exception.BusinessException; |
| 5 | +import com.datamate.common.infrastructure.exception.SystemErrorCode; |
| 6 | +import lombok.extern.slf4j.Slf4j; |
| 7 | + |
| 8 | +import org.apache.commons.compress.archivers.ArchiveEntry; |
| 9 | +import org.apache.commons.compress.archivers.ArchiveInputStream; |
| 10 | +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; |
| 11 | +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; |
| 12 | +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; |
| 13 | +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; |
| 14 | +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; |
| 15 | + |
| 16 | +import java.io.BufferedInputStream; |
| 17 | +import java.io.BufferedOutputStream; |
| 18 | +import java.io.File; |
| 19 | +import java.io.IOException; |
| 20 | +import java.io.OutputStream; |
| 21 | +import java.nio.charset.StandardCharsets; |
| 22 | +import java.nio.file.Files; |
| 23 | +import java.nio.file.Path; |
| 24 | +import java.nio.file.Paths; |
| 25 | +import java.util.*; |
| 26 | + |
| 27 | +/** |
| 28 | + * Responsible for validating and unpacking archive files. |
| 29 | + * |
| 30 | + * @since 2023-11-17 |
| 31 | + */ |
| 32 | +@Slf4j |
| 33 | +public class ArchiveAnalyzer { |
| 34 | + private static final int DEFAULT_BUFFER_SIZE = 4096; |
| 35 | + |
| 36 | + /** |
| 37 | + * Process list. |
| 38 | + * |
| 39 | + * @param fileDto The uploaded file DTO |
| 40 | + * @return the list |
| 41 | + */ |
| 42 | + public static List<FileUploadResult> process(FileUploadResult fileDto) { |
| 43 | + log.info("Start unpacking [{}]", fileDto.getFileName()); |
| 44 | + File file = fileDto.getSavedFile(); |
| 45 | + Path archivePath; |
| 46 | + try { |
| 47 | + archivePath = Paths.get(file.getCanonicalPath()); |
| 48 | + } catch (IOException e) { |
| 49 | + log.error("Failed to get the archive file path."); |
| 50 | + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, "Failed to get the archive file path."); |
| 51 | + } |
| 52 | + |
| 53 | + List<FileUploadResult> unpacked; |
| 54 | + String extension = AnalyzerUtils.getExtension(fileDto.getFileName()); |
| 55 | + if (AnalyzerUtils.TYPE_ZIP.equalsIgnoreCase(extension)) { |
| 56 | + log.info("ZIP unpacking [{}]", fileDto.getFileName()); |
| 57 | + unpacked = processZip(archivePath); |
| 58 | + log.info("ZIP unpacking FINISHED [{}]", fileDto.getFileName()); |
| 59 | + } else if (AnalyzerUtils.TYPE_TAR_GZ.equalsIgnoreCase(extension)) { |
| 60 | + unpacked = processTarGz(archivePath); |
| 61 | + } else { |
| 62 | + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, "Unrecognized archive format."); |
| 63 | + } |
| 64 | + |
| 65 | + if (!archivePath.toFile().delete()) { |
| 66 | + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, "Unable to delete the archive file " + archivePath.toAbsolutePath()); |
| 67 | + } |
| 68 | + |
| 69 | + log.info("Finished unpacking [{}]", fileDto.getFileName()); |
| 70 | + return unpacked; |
| 71 | + } |
| 72 | + |
| 73 | + private static List<FileUploadResult> processZip(Path archivePath) { |
| 74 | + try (ArchiveInputStream<ZipArchiveEntry> inputStream = new ZipArchiveInputStream( |
| 75 | + new BufferedInputStream(Files.newInputStream(archivePath)))) { |
| 76 | + return unpackArchive(inputStream, archivePath); |
| 77 | + } catch (IOException e) { |
| 78 | + log.error("Failed to unpack zip archive:", e); |
| 79 | + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, "Failed to unpack zip archive."); |
| 80 | + } |
| 81 | + } |
| 82 | + |
| 83 | + private static List<FileUploadResult> processTarGz(Path archivePath) { |
| 84 | + try (ArchiveInputStream<TarArchiveEntry> inputStream = new TarArchiveInputStream( |
| 85 | + new GzipCompressorInputStream(new BufferedInputStream(Files.newInputStream(archivePath))), |
| 86 | + StandardCharsets.UTF_8.toString())) { |
| 87 | + return unpackArchive(inputStream, archivePath); |
| 88 | + } catch (IOException e) { |
| 89 | + log.error("Failed to unpack tar.gz archive:", e); |
| 90 | + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, "Failed to unpack tar.gz archive."); |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + private static List<FileUploadResult> unpackArchive(ArchiveInputStream<?> archiveInputStream, Path archivePath) throws IOException { |
| 95 | + List<FileUploadResult> unpacked = new ArrayList<>(); |
| 96 | + long unpackedSize = 0L; |
| 97 | + try { |
| 98 | + ArchiveEntry archiveEntry; |
| 99 | + int entryCount = 0; |
| 100 | + while ((archiveEntry = archiveInputStream.getNextEntry()) != null) { |
| 101 | + if (isSymlink(archiveEntry)) { |
| 102 | + // 解压时跳过symlink文件 |
| 103 | + continue; |
| 104 | + } |
| 105 | + entryCount++; |
| 106 | + if (checkUnpackSizeAndFileSize(entryCount, unpacked) || checkVersionSize(unpackedSize, archiveEntry.getSize())) { |
| 107 | + break; |
| 108 | + } |
| 109 | + if (!archiveEntry.isDirectory()) { |
| 110 | + unpackedSize = addFileAndCountFileSize(archiveInputStream, archiveEntry, unpacked, |
| 111 | + unpackedSize, archivePath); |
| 112 | + } |
| 113 | + } |
| 114 | + } catch (IOException e) { |
| 115 | + unpacked.forEach(v -> deleteFile(v.getSavedFile())); |
| 116 | + throw e; |
| 117 | + } |
| 118 | + |
| 119 | + return unpacked; |
| 120 | + } |
| 121 | + |
| 122 | + private static boolean checkVersionSize(long unpackedSize, long currFileSize) { |
| 123 | + return false; |
| 124 | + } |
| 125 | + |
| 126 | + private static long addFileAndCountFileSize(ArchiveInputStream<?> archiveInputStream, ArchiveEntry archiveEntry, |
| 127 | + List<FileUploadResult> unpacked, long unpackedSize, Path archivePath) throws IOException { |
| 128 | + Optional<FileUploadResult> uploadFileDto = extractEntity(archiveInputStream, archiveEntry, archivePath); |
| 129 | + long newSize = unpackedSize; |
| 130 | + if (uploadFileDto.isPresent()) { |
| 131 | + FileUploadResult dto = uploadFileDto.get(); |
| 132 | + unpacked.add(dto); |
| 133 | + newSize += dto.getSavedFile().length(); |
| 134 | + } |
| 135 | + return newSize; |
| 136 | + } |
| 137 | + |
| 138 | + private static boolean checkUnpackSizeAndFileSize(int entryCount, List<FileUploadResult> unpacked) { |
| 139 | + return false; |
| 140 | + } |
| 141 | + |
| 142 | + private static Optional<FileUploadResult> extractEntity(ArchiveInputStream<?> archiveInputStream, ArchiveEntry archiveEntry, Path archivePath) |
| 143 | + throws IOException { |
| 144 | + byte[] buffer = new byte[DEFAULT_BUFFER_SIZE]; |
| 145 | + Path path = Paths.get(archivePath.getParent().toString(), archiveEntry.getName()); |
| 146 | + File file = path.toFile(); |
| 147 | + long fileSize = 0L; |
| 148 | + String extension = AnalyzerUtils.getExtension(archiveEntry.getName()); |
| 149 | + |
| 150 | + long supportFileSize = 1024*1024*1024; // 上传大小暂定为1个G |
| 151 | + try (OutputStream outputStream = new BufferedOutputStream(Files.newOutputStream(file.toPath()))) { |
| 152 | + int byteRead; |
| 153 | + while ((byteRead = archiveInputStream.read(buffer)) != -1) { |
| 154 | + outputStream.write(buffer, 0, byteRead); |
| 155 | + fileSize += byteRead; |
| 156 | + if (fileSize > supportFileSize) { |
| 157 | + break; |
| 158 | + } |
| 159 | + } |
| 160 | + } catch (IOException e) { |
| 161 | + log.error("error happened while write entry to file system"); |
| 162 | + file.delete(); |
| 163 | + throw e; |
| 164 | + } |
| 165 | + |
| 166 | + if (fileSize > supportFileSize) { |
| 167 | + // 文件大小超过限制,删除 |
| 168 | + log.info("file {} size exceeds limit", archiveEntry.getName()); |
| 169 | + file.delete(); |
| 170 | + return Optional.empty(); |
| 171 | + } |
| 172 | + return Optional.of(FileUploadResult.builder().savedFile(file).fileName(CommonUtils.trimFilePath(archiveEntry.getName())).build()); |
| 173 | + } |
| 174 | + |
| 175 | + private static void deleteFile(File file) { |
| 176 | + Path fileToDeletePath = Paths.get(file.getPath()); |
| 177 | + if (Files.exists(fileToDeletePath)) { |
| 178 | + try { |
| 179 | + Files.delete(fileToDeletePath); |
| 180 | + } catch (IOException e1) { |
| 181 | + log.error("Failed to delete file.", e1); |
| 182 | + } |
| 183 | + } |
| 184 | + } |
| 185 | + |
| 186 | + private static boolean isSymlink(ArchiveEntry archiveEntry) { |
| 187 | + if (archiveEntry instanceof TarArchiveEntry) { |
| 188 | + return ((TarArchiveEntry) archiveEntry).isSymbolicLink(); |
| 189 | + } |
| 190 | + return false; |
| 191 | + } |
| 192 | +} |
0 commit comments