|
1 | 1 | package com.datamate.datamanagement.application; |
2 | 2 |
|
3 | 3 | import com.baomidou.mybatisplus.core.metadata.IPage; |
| 4 | +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; |
4 | 5 | import com.datamate.common.domain.model.ChunkUploadPreRequest; |
5 | 6 | import com.datamate.common.domain.model.FileUploadResult; |
6 | 7 | import com.datamate.common.domain.service.FileService; |
|
29 | 30 | import com.fasterxml.jackson.databind.ObjectMapper; |
30 | 31 | import jakarta.servlet.http.HttpServletResponse; |
31 | 32 | import lombok.extern.slf4j.Slf4j; |
| 33 | +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; |
| 34 | +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; |
| 35 | +import org.apache.commons.io.IOUtils; |
32 | 36 | import org.springframework.beans.factory.annotation.Autowired; |
33 | 37 | import org.springframework.beans.factory.annotation.Value; |
34 | 38 | import org.springframework.core.io.Resource; |
|
37 | 41 | import org.springframework.stereotype.Service; |
38 | 42 | import org.springframework.transaction.annotation.Transactional; |
39 | 43 |
|
40 | | -import java.io.BufferedInputStream; |
41 | 44 | import java.io.File; |
42 | 45 | import java.io.IOException; |
43 | 46 | import java.io.InputStream; |
44 | 47 | import java.net.MalformedURLException; |
45 | 48 | import java.nio.file.Files; |
46 | 49 | import java.nio.file.Path; |
47 | 50 | import java.nio.file.Paths; |
| 51 | +import java.nio.file.attribute.BasicFileAttributes; |
48 | 52 | import java.time.LocalDateTime; |
| 53 | +import java.time.ZoneId; |
49 | 54 | import java.time.format.DateTimeFormatter; |
50 | 55 | import java.util.*; |
51 | 56 | import java.util.concurrent.CompletableFuture; |
52 | 57 | import java.util.function.Function; |
53 | 58 | import java.util.stream.Collectors; |
54 | | -import java.util.zip.ZipEntry; |
55 | | -import java.util.zip.ZipOutputStream; |
| 59 | +import java.util.stream.Stream; |
56 | 60 |
|
57 | 61 | /** |
58 | 62 | * 数据集文件应用服务 |
@@ -85,11 +89,77 @@ public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository |
85 | 89 | */ |
86 | 90 | @Transactional(readOnly = true) |
87 | 91 | public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) { |
88 | | - IPage<DatasetFile> page = new com.baomidou.mybatisplus.extension.plugins.pagination.Page<>(pagingQuery.getPage(), pagingQuery.getSize()); |
| 92 | + IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); |
89 | 93 | IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page); |
90 | 94 | return PagedResponse.of(files); |
91 | 95 | } |
92 | 96 |
|
| 97 | + /** |
| 98 | + * 获取数据集文件列表 |
| 99 | + */ |
| 100 | + @Transactional(readOnly = true) |
| 101 | + public PagedResponse<DatasetFile> getDatasetFilesWithDirectory(String datasetId, String prefix, PagingQuery pagingQuery) { |
| 102 | + Dataset dataset = datasetRepository.getById(datasetId); |
| 103 | + int page = Math.max(pagingQuery.getPage(), 1); |
| 104 | + int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize(); |
| 105 | + if (dataset == null) { |
| 106 | + return PagedResponse.of(new Page<>(page, size)); |
| 107 | + } |
| 108 | + String datasetPath = dataset.getPath(); |
| 109 | + Path queryPath = Path.of(dataset.getPath() + File.separator + prefix); |
| 110 | + Map<String, DatasetFile> datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId) |
| 111 | + .stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); |
| 112 | + try (Stream<Path> pathStream = Files.list(queryPath)) { |
| 113 | + List<Path> allFiles = pathStream |
| 114 | + .filter(path -> path.toString().startsWith(datasetPath)) |
| 115 | + .sorted(Comparator |
| 116 | + .comparing((Path path) -> !Files.isDirectory(path)) |
| 117 | + .thenComparing(path -> path.getFileName().toString())) |
| 118 | + .collect(Collectors.toList()); |
| 119 | + |
| 120 | + // 计算分页 |
| 121 | + int total = allFiles.size(); |
| 122 | + int totalPages = (int) Math.ceil((double) total / size); |
| 123 | + |
| 124 | + // 获取当前页数据 |
| 125 | + int fromIndex = (page - 1) * size; |
| 126 | + fromIndex = Math.max(fromIndex, 0); |
| 127 | + int toIndex = Math.min(fromIndex + size, total); |
| 128 | + |
| 129 | + List<Path> pageData = new ArrayList<>(); |
| 130 | + if (fromIndex < total) { |
| 131 | + pageData = allFiles.subList(fromIndex, toIndex); |
| 132 | + } |
| 133 | + List<DatasetFile> datasetFiles = pageData.stream().map(path -> getDatasetFile(path, datasetFilesMap)).toList(); |
| 134 | + |
| 135 | + return new PagedResponse<>(page, size, total, totalPages, datasetFiles); |
| 136 | + } catch (IOException e) { |
| 137 | + log.error("list dataset path error", e); |
| 138 | + return PagedResponse.of(new Page<>(page, size)); |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + private DatasetFile getDatasetFile(Path path, Map<String, DatasetFile> datasetFilesMap) { |
| 143 | + DatasetFile datasetFile = new DatasetFile(); |
| 144 | + LocalDateTime localDateTime = LocalDateTime.now(); |
| 145 | + try { |
| 146 | + localDateTime = Files.getLastModifiedTime(path).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); |
| 147 | + } catch (IOException e) { |
| 148 | + log.error("get last modified time error", e); |
| 149 | + } |
| 150 | + datasetFile.setFileName(path.getFileName().toString()); |
| 151 | + datasetFile.setUploadTime(localDateTime); |
| 152 | + if (Files.isDirectory(path)) { |
| 153 | + datasetFile.setId("directory-" + datasetFile.getFileName()); |
| 154 | + } else if (Objects.isNull(datasetFilesMap.get(path.toString()))) { |
| 155 | + datasetFile.setId("file-" + datasetFile.getFileName()); |
| 156 | + datasetFile.setFileSize(path.toFile().length()); |
| 157 | + } else { |
| 158 | + datasetFile = datasetFilesMap.get(path.toString()); |
| 159 | + } |
| 160 | + return datasetFile; |
| 161 | + } |
| 162 | + |
93 | 163 | /** |
94 | 164 | * 获取文件详情 |
95 | 165 | */ |
@@ -151,58 +221,61 @@ public Resource downloadFile(String datasetId, String fileId) { |
151 | 221 | */ |
152 | 222 | @Transactional(readOnly = true) |
153 | 223 | public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) { |
| 224 | + Dataset dataset = datasetRepository.getById(datasetId); |
| 225 | + if (Objects.isNull(dataset)) { |
| 226 | + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); |
| 227 | + } |
154 | 228 | List<DatasetFile> allByDatasetId = datasetFileRepository.findAllByDatasetId(datasetId); |
155 | | - fileRename(allByDatasetId); |
| 229 | + Set<String> filePaths = allByDatasetId.stream().map(DatasetFile::getFilePath).collect(Collectors.toSet()); |
| 230 | + String datasetPath = dataset.getPath(); |
| 231 | + Path downloadPath = Path.of(datasetPath); |
156 | 232 | response.setContentType("application/zip"); |
157 | 233 | String zipName = String.format("dataset_%s.zip", |
158 | 234 | LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"))); |
159 | 235 | response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + zipName); |
160 | | - try (ZipOutputStream zos = new ZipOutputStream(response.getOutputStream())) { |
161 | | - for (DatasetFile file : allByDatasetId) { |
162 | | - addToZipFile(file, zos); |
| 236 | + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(response.getOutputStream())) { |
| 237 | + try (Stream<Path> pathStream = Files.walk(downloadPath)) { |
| 238 | + List<Path> allPaths = pathStream.filter(path -> path.toString().startsWith(datasetPath)) |
| 239 | + .filter(path -> filePaths.stream().anyMatch(filePath -> filePath.startsWith(path.toString()))) |
| 240 | + .toList(); |
| 241 | + for (Path path : allPaths) { |
| 242 | + addToZipFile(path, downloadPath, zos); |
| 243 | + } |
163 | 244 | } |
164 | 245 | } catch (IOException e) { |
165 | 246 | log.error("Failed to download files in batches.", e); |
166 | 247 | throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); |
167 | 248 | } |
168 | 249 | } |
169 | 250 |
|
170 | | - private void fileRename(List<DatasetFile> files) { |
171 | | - Set<String> uniqueFilenames = new HashSet<>(); |
172 | | - for (DatasetFile file : files) { |
173 | | - String originalFilename = file.getFileName(); |
174 | | - if (!uniqueFilenames.add(originalFilename)) { |
175 | | - String newFilename; |
176 | | - int counter = 1; |
177 | | - do { |
178 | | - newFilename = generateNewFilename(originalFilename, counter); |
179 | | - counter++; |
180 | | - } while (!uniqueFilenames.add(newFilename)); |
181 | | - file.setFileName(newFilename); |
| 251 | + private void addToZipFile(Path path, Path basePath, ZipArchiveOutputStream zos) throws IOException { |
| 252 | + String entryName = basePath.relativize(path) |
| 253 | + .toString() |
| 254 | + .replace(File.separator, "/"); |
| 255 | + |
| 256 | + // 处理目录 |
| 257 | + if (Files.isDirectory(path)) { |
| 258 | + if (!entryName.isEmpty()) { |
| 259 | + entryName += "/"; |
| 260 | + ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName); |
| 261 | + zos.putArchiveEntry(dirEntry); |
| 262 | + zos.closeArchiveEntry(); |
182 | 263 | } |
183 | | - } |
184 | | - } |
| 264 | + } else { |
| 265 | + // 处理文件 |
| 266 | + ZipArchiveEntry fileEntry = new ZipArchiveEntry(path.toFile(), entryName); |
185 | 267 |
|
186 | | - private String generateNewFilename(String oldFilename, int counter) { |
187 | | - int dotIndex = oldFilename.lastIndexOf("."); |
188 | | - return oldFilename.substring(0, dotIndex) + "-(" + counter + ")" + oldFilename.substring(dotIndex); |
189 | | - } |
| 268 | + // 设置更多属性 |
| 269 | + BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); |
| 270 | + fileEntry.setSize(attrs.size()); |
| 271 | + fileEntry.setLastModifiedTime(attrs.lastModifiedTime()); |
190 | 272 |
|
191 | | - private void addToZipFile(DatasetFile file, ZipOutputStream zos) throws IOException { |
192 | | - if (file.getFilePath() == null || !Files.exists(Paths.get(file.getFilePath()))) { |
193 | | - log.warn("The file hasn't been found on filesystem, id: {}", file.getId()); |
194 | | - return; |
195 | | - } |
196 | | - try (InputStream fis = Files.newInputStream(Paths.get(file.getFilePath())); |
197 | | - BufferedInputStream bis = new BufferedInputStream(fis)) { |
198 | | - ZipEntry zipEntry = new ZipEntry(file.getFileName()); |
199 | | - zos.putNextEntry(zipEntry); |
200 | | - byte[] buffer = new byte[8192]; |
201 | | - int length; |
202 | | - while ((length = bis.read(buffer)) >= 0) { |
203 | | - zos.write(buffer, 0, length); |
| 273 | + zos.putArchiveEntry(fileEntry); |
| 274 | + |
| 275 | + try (InputStream is = Files.newInputStream(path)) { |
| 276 | + IOUtils.copy(is, zos); |
204 | 277 | } |
205 | | - zos.closeEntry(); |
| 278 | + zos.closeArchiveEntry(); |
206 | 279 | } |
207 | 280 | } |
208 | 281 |
|
|
0 commit comments