Skip to content

Commit 57e81d9

Browse files
committed
MSOfficeDocumentsParserResolver
1 parent 10d8db8 commit 57e81d9

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
package io.jmix.search.index.fileparsing.resolvers;
1818

1919
import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver;
20+
import org.apache.tika.parser.ParseContext;
2021
import org.apache.tika.parser.Parser;
22+
import org.apache.tika.parser.microsoft.OfficeParserConfig;
2123
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
2224
import org.springframework.core.annotation.Order;
2325
import org.springframework.stereotype.Component;
@@ -37,4 +39,15 @@ public Set<String> getSupportedExtensions() {
3739
public Parser getParser() {
3840
return new OOXMLParser();
3941
}
42+
43+
@Override
44+
protected ParseContext getParseContext() {
45+
ParseContext parseContext = super.getParseContext();
46+
47+
OfficeParserConfig officeParserConfig = new OfficeParserConfig();
48+
officeParserConfig.setIncludeHeadersAndFooters(false);
49+
parseContext.set(OfficeParserConfig.class, officeParserConfig);
50+
51+
return parseContext;
52+
}
4053
}

0 commit comments

Comments
 (0)