|
| 1 | +/*--------------------------------------------------------------------------------------------- |
| 2 | + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. |
| 3 | + * This file is a part of the ModelEngine Project. |
| 4 | + * Licensed under the MIT License. See License.txt in the project root for license information. |
| 5 | + *--------------------------------------------------------------------------------------------*/ |
| 6 | + |
| 7 | +package modelengine.fit.jade.aipp.file.extract; |
| 8 | + |
| 9 | +import cn.idev.excel.ExcelReader; |
| 10 | +import cn.idev.excel.FastExcel; |
| 11 | +import cn.idev.excel.context.AnalysisContext; |
| 12 | +import cn.idev.excel.converters.Converter; |
| 13 | +import cn.idev.excel.enums.CellDataTypeEnum; |
| 14 | +import cn.idev.excel.metadata.GlobalConfiguration; |
| 15 | +import cn.idev.excel.metadata.data.DataFormatData; |
| 16 | +import cn.idev.excel.metadata.data.ReadCellData; |
| 17 | +import cn.idev.excel.metadata.property.ExcelContentProperty; |
| 18 | +import cn.idev.excel.read.listener.ReadListener; |
| 19 | +import cn.idev.excel.read.metadata.ReadSheet; |
| 20 | +import cn.idev.excel.util.DateUtils; |
| 21 | +import cn.idev.excel.util.StringUtils; |
| 22 | +import lombok.NonNull; |
| 23 | +import modelengine.fit.jober.aipp.service.OperatorService; |
| 24 | +import modelengine.fitframework.annotation.Component; |
| 25 | +import modelengine.fitframework.annotation.Fitable; |
| 26 | + |
| 27 | +import java.io.BufferedInputStream; |
| 28 | +import java.io.File; |
| 29 | +import java.io.IOException; |
| 30 | +import java.io.InputStream; |
| 31 | +import java.math.BigDecimal; |
| 32 | +import java.nio.file.Files; |
| 33 | +import java.nio.file.InvalidPathException; |
| 34 | +import java.nio.file.Path; |
| 35 | +import java.nio.file.Paths; |
| 36 | +import java.text.SimpleDateFormat; |
| 37 | +import java.util.Arrays; |
| 38 | +import java.util.Date; |
| 39 | +import java.util.List; |
| 40 | +import java.util.Map; |
| 41 | +import java.util.stream.Collectors; |
| 42 | + |
| 43 | +/** |
| 44 | + * Excel文件的提取器。 |
| 45 | + * |
| 46 | + * @author 黄政炫 |
| 47 | + * @since 2025-09-06 |
| 48 | + */ |
| 49 | +@Component |
| 50 | +public class ExcelFileExtractor implements FileExtractor { |
| 51 | + /** |
| 52 | + * 把单元格转换成格式化字符串。 |
| 53 | + * |
| 54 | + * @param cell 表示单元格数据 {@link ReadCellData}。 |
| 55 | + * @return 转换后的内容 {@link String}。 |
| 56 | + */ |
| 57 | + private static String getCellValueAsString(@NonNull ReadCellData<?> cell) { |
| 58 | + switch (cell.getType()) { |
| 59 | + case STRING: |
| 60 | + return cell.getStringValue(); |
| 61 | + case NUMBER: |
| 62 | + DataFormatData fmt = cell.getDataFormatData(); |
| 63 | + if (DateUtils.isADateFormat(fmt.getIndex(), fmt.getFormat())) { |
| 64 | + double value = cell.getNumberValue().doubleValue(); |
| 65 | + Date date = DateUtils.getJavaDate(value, true); |
| 66 | + return new SimpleDateFormat("yyyy-MM-dd").format(date); |
| 67 | + } else { |
| 68 | + BigDecimal num = cell.getNumberValue(); |
| 69 | + return num.stripTrailingZeros().toPlainString(); |
| 70 | + } |
| 71 | + case BOOLEAN: |
| 72 | + return Boolean.toString(cell.getBooleanValue()); |
| 73 | + default: |
| 74 | + return ""; |
| 75 | + } |
| 76 | + } |
| 77 | + |
| 78 | + /** |
| 79 | + * 该文件提取器支持EXCEL和CSV类型。 |
| 80 | + * |
| 81 | + * @return 支持的枚举常量类型列表 {@link List}{@code <}{@link String}{@code >}。 |
| 82 | + */ |
| 83 | + @Override |
| 84 | + @Fitable(id = "get-fileType-excel") |
| 85 | + public List<String> supportedFileTypes() { |
| 86 | + return Arrays.asList(OperatorService.FileType.EXCEL.toString(), OperatorService.FileType.CSV.toString()); |
| 87 | + } |
| 88 | + |
| 89 | + /** |
| 90 | + * 判断文件路径是否有效 |
| 91 | + * |
| 92 | + * @param fileUrl 表示文件路径 {@link String}。 |
| 93 | + * @return 表示路径是否有效 {@code boolean}。 |
| 94 | + */ |
| 95 | + private boolean isValidPath(String fileUrl) { |
| 96 | + try { |
| 97 | + Path path = Paths.get(fileUrl); |
| 98 | + return Files.exists(path) && Files.isRegularFile(path); |
| 99 | + } catch (InvalidPathException e) { |
| 100 | + return false; |
| 101 | + } |
| 102 | + } |
| 103 | + |
| 104 | + /** |
| 105 | + * 从指定路径的 Excel 文件中提取内容,并返回为字符串形式。 |
| 106 | + * |
| 107 | + * @param fileUrl 表示文件路径的 {@link String}。 |
| 108 | + * @return 表示文件内容的 {@link String}。 |
| 109 | + */ |
| 110 | + @Override |
| 111 | + @Fitable(id = "extract-file-excel") |
| 112 | + public String extractFile(String fileUrl) { |
| 113 | + if (!isValidPath(fileUrl)) { |
| 114 | + throw new IllegalArgumentException(String.format("Invalid FilePath. [fileUrl=%s]", fileUrl)); |
| 115 | + } |
| 116 | + File file = Paths.get(fileUrl).toFile(); |
| 117 | + StringBuilder excelContent = new StringBuilder(); |
| 118 | + ExcelReadListener listener = new ExcelReadListener(excelContent); |
| 119 | + ExcelReader reader = null; |
| 120 | + try (InputStream is = new BufferedInputStream(Files.newInputStream(file.toPath()))) { |
| 121 | + reader = FastExcel.read(is, listener) |
| 122 | + .registerConverter(new CustomCellStringConverter()) |
| 123 | + .headRowNumber(0) |
| 124 | + .build(); |
| 125 | + |
| 126 | + List<ReadSheet> sheets = reader.excelExecutor().sheetList(); |
| 127 | + for (ReadSheet meta : sheets) { |
| 128 | + excelContent.append("Sheet ").append(meta.getSheetNo() + 1).append(':').append('\n'); |
| 129 | + ReadSheet readSheet = FastExcel.readSheet(meta.getSheetNo()).headRowNumber(0).build(); |
| 130 | + reader.read(readSheet); |
| 131 | + } |
| 132 | + excelContent.append('\n'); |
| 133 | + } catch (IOException e) { |
| 134 | + throw new IllegalStateException(String.format("Fail to extract excel file. [exception=%s]", e.getMessage()), |
| 135 | + e); |
| 136 | + } finally { |
| 137 | + if (reader != null) { |
| 138 | + reader.finish(); // 关闭资源 |
| 139 | + } |
| 140 | + } |
| 141 | + return excelContent.toString(); |
| 142 | + } |
| 143 | + |
| 144 | + /** |
| 145 | + * 读取监听器的内部类实现。 |
| 146 | + */ |
| 147 | + private class ExcelReadListener implements ReadListener<Map<Integer, String>> { |
| 148 | + private final StringBuilder excelContent; |
| 149 | + |
| 150 | + ExcelReadListener(StringBuilder excelContent) { |
| 151 | + this.excelContent = excelContent; |
| 152 | + } |
| 153 | + |
| 154 | + @Override |
| 155 | + public void invoke(Map<Integer, String> data, AnalysisContext context) { |
| 156 | + String line = data.entrySet() |
| 157 | + .stream() |
| 158 | + .sorted(Map.Entry.comparingByKey()) |
| 159 | + .map(e -> e.getValue() == null ? "" : e.getValue()) |
| 160 | + .collect(Collectors.joining("\t")); |
| 161 | + this.excelContent.append(line).append('\n'); |
| 162 | + } |
| 163 | + |
| 164 | + @Override |
| 165 | + public void doAfterAllAnalysed(AnalysisContext context) {} |
| 166 | + } |
| 167 | + |
| 168 | + /** |
| 169 | + * 自定义单元格数据转换器。 |
| 170 | + * 该转换器实现了能够处理单元格数据并将其转换为字符串形式。 |
| 171 | + */ |
| 172 | + public static class CustomCellStringConverter implements Converter<String> { |
| 173 | + @Override |
| 174 | + public Class<String> supportJavaTypeKey() { |
| 175 | + return String.class; |
| 176 | + } |
| 177 | + |
| 178 | + @Override |
| 179 | + public CellDataTypeEnum supportExcelTypeKey() { |
| 180 | + return null; |
| 181 | + } |
| 182 | + |
| 183 | + @Override |
| 184 | + public String convertToJavaData(ReadCellData<?> cellData, ExcelContentProperty contentProperty, |
| 185 | + GlobalConfiguration globalConfiguration) { |
| 186 | + return (cellData != null) ? getCellValueAsString(cellData) : StringUtils.EMPTY; |
| 187 | + } |
| 188 | + } |
| 189 | +} |
0 commit comments