Skip to content

Commit 381b95c

Browse files
committed
把提取分析excel文件的功能做了插件化处理
1 parent 6141ae9 commit 381b95c

File tree

15 files changed

+492
-70
lines changed

15 files changed

+492
-70
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
<parent>
7+
<groupId>modelengine.fit.jade</groupId>
8+
<artifactId>app-builder-plugin-parent</artifactId>
9+
<version>1.0.0-SNAPSHOT</version>
10+
</parent>
11+
12+
<artifactId>aipp-file-extract-excel</artifactId>
13+
14+
<dependencies>
15+
<!-- FIT -->
16+
<dependency>
17+
<groupId>org.fitframework</groupId>
18+
<artifactId>fit-api</artifactId>
19+
</dependency>
20+
<dependency>
21+
<groupId>org.fitframework</groupId>
22+
<artifactId>fit-util</artifactId>
23+
</dependency>
24+
<dependency>
25+
<groupId>cn.idev.excel</groupId>
26+
<artifactId>fastexcel</artifactId>
27+
</dependency>
28+
<dependency>
29+
<groupId>modelengine.fit.jade</groupId>
30+
<artifactId>aipp-file-extract-service</artifactId>
31+
</dependency>
32+
<dependency>
33+
<groupId>org.junit.jupiter</groupId>
34+
<artifactId>junit-jupiter</artifactId>
35+
</dependency>
36+
<dependency>
37+
<groupId>org.fitframework</groupId>
38+
<artifactId>fit-test-framework</artifactId>
39+
</dependency>
40+
<dependency>
41+
<groupId>org.assertj</groupId>
42+
<artifactId>assertj-core</artifactId>
43+
</dependency>
44+
</dependencies>
45+
46+
<build>
47+
<plugins>
48+
<plugin>
49+
<groupId>org.fitframework</groupId>
50+
<artifactId>fit-build-maven-plugin</artifactId>
51+
<version>${fit.version}</version>
52+
<executions>
53+
<execution>
54+
<id>build-plugin</id>
55+
<goals>
56+
<goal>build-plugin</goal>
57+
</goals>
58+
</execution>
59+
<execution>
60+
<id>package-plugin</id>
61+
<goals>
62+
<goal>package-plugin</goal>
63+
</goals>
64+
</execution>
65+
</executions>
66+
</plugin>
67+
<plugin>
68+
<groupId>org.apache.maven.plugins</groupId>
69+
<artifactId>maven-antrun-plugin</artifactId>
70+
<version>${maven.antrun.version}</version>
71+
<executions>
72+
<execution>
73+
<phase>install</phase>
74+
<configuration>
75+
<target>
76+
<copy file="${project.build.directory}/${project.build.finalName}.jar"
77+
todir="../../../build/plugins"/>
78+
</target>
79+
</configuration>
80+
<goals>
81+
<goal>run</goal>
82+
</goals>
83+
</execution>
84+
</executions>
85+
</plugin>
86+
</plugins>
87+
</build>
88+
89+
</project>
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
3+
* This file is a part of the ModelEngine Project.
4+
* Licensed under the MIT License. See License.txt in the project root for license information.
5+
*--------------------------------------------------------------------------------------------*/
6+
7+
package modelengine.fit.jade.aipp.file.extract;
8+
9+
import cn.idev.excel.ExcelReader;
10+
import cn.idev.excel.FastExcel;
11+
import cn.idev.excel.context.AnalysisContext;
12+
import cn.idev.excel.converters.Converter;
13+
import cn.idev.excel.enums.CellDataTypeEnum;
14+
import cn.idev.excel.metadata.GlobalConfiguration;
15+
import cn.idev.excel.metadata.data.DataFormatData;
16+
import cn.idev.excel.metadata.data.ReadCellData;
17+
import cn.idev.excel.metadata.property.ExcelContentProperty;
18+
import cn.idev.excel.read.listener.ReadListener;
19+
import cn.idev.excel.read.metadata.ReadSheet;
20+
import cn.idev.excel.util.DateUtils;
21+
import modelengine.fitframework.annotation.Component;
22+
import modelengine.fitframework.annotation.Fitable;
23+
24+
import java.io.BufferedInputStream;
25+
import java.io.File;
26+
import java.io.IOException;
27+
import java.io.InputStream;
28+
import java.math.BigDecimal;
29+
import java.nio.file.Files;
30+
import java.nio.file.Paths;
31+
import java.text.SimpleDateFormat;
32+
import java.util.Date;
33+
import java.util.List;
34+
import java.util.Map;
35+
import java.util.stream.Collectors;
36+
37+
@Component
38+
public class ExcelFileExtractor implements AbstractFileExtractor {
39+
40+
private static String getCellValueAsString(ReadCellData<?> cell) {
41+
switch (cell.getType()) {
42+
case STRING:
43+
return cell.getStringValue();
44+
case NUMBER:
45+
DataFormatData fmt = cell.getDataFormatData();
46+
short formatIndex = fmt.getIndex();
47+
String formatString = fmt.getFormat();
48+
if (DateUtils.isADateFormat(formatIndex, formatString)) {
49+
double value = cell.getNumberValue().doubleValue();
50+
Date date = DateUtils.getJavaDate(value, true);
51+
return new SimpleDateFormat("yyyy-MM-dd").format(date);
52+
} else {
53+
BigDecimal num = cell.getNumberValue();
54+
return num.stripTrailingZeros().toPlainString();
55+
}
56+
case BOOLEAN:
57+
return Boolean.toString(cell.getBooleanValue());
58+
default:
59+
return "";
60+
}
61+
}
62+
63+
@Override
64+
@Fitable(id = "get-fileType-excel")
65+
public FileTypeConstant.FileType supportedFileType() {
66+
return FileTypeConstant.FileType.EXCEL;
67+
}
68+
69+
/**
70+
* 从指定路径的 Excel 文件中提取内容,并返回为字符串形式。
71+
* 实现方式:
72+
* 基于 fast-excel 包,使用流式读取(ReadListener)逐行解析,避免一次性加载整表造成的内存开销。
73+
* 每行数据会被转换为以制表符(\t)分隔的文本,并在行末追加换行符。
74+
* 支持多 sheet 解析,会依次读取工作簿中的每一个 sheet。
75+
*
76+
* @param fileUrl 表示文件路径的 {@link String}.
77+
* @return 表示文件内容的 {@link String}。
78+
* @throws RuntimeException 当文件读取或解析失败时抛出
79+
*/
80+
@Override
81+
@Fitable(id = "extract-file-excel")
82+
public String extractFile(String fileUrl) {
83+
File file = Paths.get(fileUrl).toFile();
84+
StringBuilder excelContent = new StringBuilder();
85+
ReadListener<Map<Integer, String>> listener = new ReadListener<>() {
86+
@Override
87+
public void invoke(Map<Integer, String> data, AnalysisContext context) {
88+
String line = data.entrySet()
89+
.stream()
90+
.sorted(Map.Entry.comparingByKey())
91+
.map(e -> e.getValue() == null ? "" : e.getValue())
92+
.collect(Collectors.joining("\t"));
93+
excelContent.append(line).append('\n');
94+
}
95+
96+
@Override
97+
public void doAfterAllAnalysed(AnalysisContext context) {
98+
}
99+
};
100+
try (InputStream is = new BufferedInputStream(Files.newInputStream(file.toPath()))) {
101+
ExcelReader reader = FastExcel.read(is, listener)
102+
.registerConverter(new CustomCellStringConverter())
103+
.headRowNumber(0)
104+
.build();
105+
106+
List<ReadSheet> sheets = reader.excelExecutor().sheetList();
107+
for (ReadSheet meta : sheets) {
108+
excelContent.append("Sheet ").append(meta.getSheetNo() + 1).append(':').append('\n');
109+
ReadSheet readSheet = FastExcel.readSheet(meta.getSheetNo()).headRowNumber(0).build();
110+
reader.read(readSheet);
111+
}
112+
excelContent.append('\n');
113+
reader.finish(); // 关闭资源
114+
} catch (IOException e) {
115+
throw new RuntimeException(e);
116+
}
117+
return excelContent.toString();
118+
}
119+
120+
/**
121+
* 自定义单元格数据转换器。
122+
* 将 Excel 单元格数据统一转换为字符串,避免数值/日期等类型在读取时格式不一致的问题。
123+
* 缺点:由于采用fast excel包,没有 FORMULA类,会将公式单元格自动计算为值
124+
*/
125+
public static class CustomCellStringConverter implements Converter<String> {
126+
@Override
127+
public Class<String> supportJavaTypeKey() {
128+
return String.class;
129+
}
130+
131+
@Override
132+
public CellDataTypeEnum supportExcelTypeKey() {
133+
return null;
134+
}
135+
136+
@Override
137+
public String convertToJavaData(ReadCellData<?> cellData, ExcelContentProperty contentProperty,
138+
GlobalConfiguration globalConfiguration) {
139+
return getCellValueAsString(cellData);
140+
}
141+
}
142+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
fit:
2+
beans:
3+
packages:
4+
- 'modelengine.fit.jade.aipp.file.extract'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
3+
* This file is a part of the ModelEngine Project.
4+
* Licensed under the MIT License. See License.txt in the project root for license information.
5+
*--------------------------------------------------------------------------------------------*/
6+
7+
package modelengine.fit.jade.aipp.file.extract;
8+
9+
import static org.assertj.core.api.Assertions.assertThat;
10+
11+
import modelengine.fitframework.annotation.Fit;
12+
import modelengine.fitframework.test.annotation.FitTestWithJunit;
13+
14+
import org.junit.jupiter.api.Disabled;
15+
import org.junit.jupiter.api.DisplayName;
16+
import org.junit.jupiter.api.Test;
17+
18+
import java.io.File;
19+
20+
@FitTestWithJunit(includeClasses = ExcelFileExtractor.class)
21+
@Disabled
22+
class ExcelFileExtractorTest {
23+
@Fit
24+
ExcelFileExtractor excelFileExtractor;
25+
26+
@Test
27+
@DisplayName("测试获取支持文件类型")
28+
void supportedFileType() {
29+
assertThat(this.excelFileExtractor.supportedFileType()).isEqualTo(FileTypeConstant.FileType.EXCEL);
30+
}
31+
32+
@Test
33+
@DisplayName("测试 excel 文件提取成功")
34+
void extractFile() {
35+
File file = new File(this.getClass().getClassLoader().getResource("file/content.xlsx").getFile());
36+
assertThat(this.excelFileExtractor.extractFile(file.getAbsolutePath())).isEqualTo(
37+
"Sheet 1:\nThis is an excel test\n\n");
38+
}
39+
}
Binary file not shown.
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
<parent>
7+
<groupId>modelengine.fit.jade</groupId>
8+
<artifactId>app-builder-plugin-parent</artifactId>
9+
<version>1.0.0-SNAPSHOT</version>
10+
</parent>
11+
12+
<artifactId>aipp-file-extract-service</artifactId>
13+
14+
<dependencies>
15+
<!-- FIT -->
16+
<dependency>
17+
<groupId>org.fitframework</groupId>
18+
<artifactId>fit-api</artifactId>
19+
</dependency>
20+
<dependency>
21+
<groupId>org.fitframework</groupId>
22+
<artifactId>fit-util</artifactId>
23+
</dependency>
24+
</dependencies>
25+
26+
<build>
27+
<plugins>
28+
<plugin>
29+
<groupId>org.apache.maven.plugins</groupId>
30+
<artifactId>maven-compiler-plugin</artifactId>
31+
<version>${maven.compiler.version}</version>
32+
<configuration>
33+
<source>${java.version}</source>
34+
<target>${java.version}</target>
35+
<encoding>${project.build.sourceEncoding}</encoding>
36+
<compilerArgs>
37+
<arg>-parameters</arg>
38+
</compilerArgs>
39+
</configuration>
40+
</plugin>
41+
<plugin>
42+
<groupId>org.fitframework</groupId>
43+
<artifactId>fit-build-maven-plugin</artifactId>
44+
<version>${fit.version}</version>
45+
<executions>
46+
<execution>
47+
<id>build-service</id>
48+
<goals>
49+
<goal>build-service</goal>
50+
</goals>
51+
</execution>
52+
</executions>
53+
</plugin>
54+
</plugins>
55+
</build>
56+
57+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
3+
* This file is a part of the ModelEngine Project.
4+
* Licensed under the MIT License. See License.txt in the project root for license information.
5+
*--------------------------------------------------------------------------------------------*/
6+
7+
package modelengine.fit.jade.aipp.file.extract;
8+
9+
import modelengine.fitframework.annotation.Genericable;
10+
11+
public interface AbstractFileExtractor {
12+
/**
13+
*
14+
* @param fileUrl 文件路径
15+
* @return 表示提取的文件信息的 {@link String}。
16+
*/
17+
@Genericable(id = "extract-file")
18+
String extractFile(String fileUrl);
19+
20+
/**
21+
*
22+
* @return 表示返回的文件枚举类型
23+
*/
24+
@Genericable(id = "get-fileType")
25+
FileTypeConstant.FileType supportedFileType();
26+
27+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
3+
* This file is a part of the ModelEngine Project.
4+
* Licensed under the MIT License. See License.txt in the project root for license information.
5+
*--------------------------------------------------------------------------------------------*/
6+
7+
package modelengine.fit.jade.aipp.file.extract;
8+
9+
public class FileTypeConstant {
10+
/**
11+
* 文件类型枚举
12+
*/
13+
public enum FileType {
14+
PDF,
15+
WORD,
16+
EXCEL,
17+
IMAGE,
18+
AUDIO,
19+
TXT,
20+
HTML,
21+
MARKDOWN,
22+
CSV
23+
}
24+
}

0 commit comments

Comments
 (0)