Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions jablib/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,15 @@ dependencies {
exclude(group = "org.apache.xmlgraphics")
}

// region for document importing
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rework with the new scheme.

But the concrete versions at https://github.com/JabRef/jabref/blob/main/versions/build.gradle.kts

implementation("org.apache.tika:tika-core:3.2.0") {
exclude(group = "commons-logging")
}
implementation("org.apache.tika:tika-parsers-standard-package:3.2.0") {
exclude(group = "commons-logging")
}
// endregion

// Even if("compileOnly") is used, IntelliJ always adds to module-info.java. To avoid issues during committing, we use("implementation") instead of("compileOnly")
implementation("io.github.adr:e-adr:2.0.0-SNAPSHOT")

Expand Down
1 change: 1 addition & 0 deletions jablib/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -252,5 +252,6 @@
requires mslinks;
requires org.antlr.antlr4.runtime;
requires org.libreoffice.uno;
requires org.apache.tika.core;
// endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ public static ParserResult fromErrorMessage(String message) {
return parserResult;
}

public static ParserResult fromEntry(BibEntry entry) {
return new ParserResult(List.of(entry));
}

private static String getErrorMessage(Exception exception) {
String errorMessage = exception.getLocalizedMessage();
if (exception.getCause() != null) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package org.jabref.logic.importer.fileformat.docs;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.Importer;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.importer.util.Constants;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.FileType;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.SAXException;

/**
* General importer for Open Document Format files.
*/
public abstract class OdfImporter extends Importer {
@Override
public boolean isRecognizedFormat(BufferedReader input) throws IOException {
return Constants.isZip(input);
}

@Override
public ParserResult importDatabase(BufferedReader input) throws IOException {
throw new UnsupportedOperationException("OdfImporter (and descendants) does not support importDatabase(BufferedReader reader)."
+ "Instead use importDatabase(Path filePath).");
}

@Override
public ParserResult importDatabase(Path filePath) throws IOException {
try (InputStream inputStream = new FileInputStream(filePath.toFile())) {
AutoDetectParser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
BodyContentHandler handler = new BodyContentHandler();

parser.parse(inputStream, handler, metadata);

BibEntry entry = extractMetadata(metadata);

return ParserResult.fromEntry(entry);
} catch (SAXException | TikaException e) {
throw new IOException("Error parsing file: " + filePath, e);
}
}

private BibEntry extractMetadata(Metadata metadata) {
Optional<String> title = Optional.ofNullable(metadata.get("dc:title"));
Optional<Date> date = Optional.ofNullable(metadata.getDate(Property.internalDate("dcterms:created")));

List<String> authors = Arrays.asList(metadata.getValues("dc:contributor"));

return new BibEntry()
.withField(StandardField.TITLE, title)
.withField(StandardField.AUTHOR, !authors.isEmpty() ? Optional.of(String.join(" and ", authors)) : Optional.empty())
.withField(StandardField.YEAR, date.map(Date::getYear).map(Object::toString));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.jabref.logic.importer.fileformat.docs;

import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.FileType;
import org.jabref.logic.util.StandardFileType;

/**
* Importer for OpenDocument Impress (ODP) files.
*/
public class OdpImporter extends OdfImporter {
@Override
public String getId() {
return "odp";
}

@Override
public String getName() {
return "OpenDocument Impress";
}

@Override
public String getDescription() {
return Localization.lang("Importer for OpenDocument Impress (ODP) files");
}

@Override
public FileType getFileType() {
return StandardFileType.ODP;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.jabref.logic.importer.fileformat.docs;

import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.FileType;
import org.jabref.logic.util.StandardFileType;

/**
* Importer for OpenDocument Calc (ODS) files.
*/
public class OdsImporter extends OdfImporter {
@Override
public String getId() {
return "ods";
}

@Override
public String getName() {
return "OpenDocument Calc";
}

@Override
public String getDescription() {
return Localization.lang("Importer for OpenDocument Calc (ODS) files");
}

@Override
public FileType getFileType() {
return StandardFileType.ODS;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.jabref.logic.importer.fileformat.docs;

import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.FileType;
import org.jabref.logic.util.StandardFileType;

/**
* Importer for OpenDocument Text (ODT) files.
*/
public class OdtImporter extends OdfImporter {
@Override
public String getId() {
return "odt";
}

@Override
public String getName() {
return "OpenDocument Writer";
}

@Override
public String getDescription() {
return Localization.lang("Importer for OpenDocument Writer (ODT) files");
}

@Override
public FileType getFileType() {
return StandardFileType.ODT;
}
}
30 changes: 30 additions & 0 deletions jablib/src/main/java/org/jabref/logic/importer/util/Constants.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.jabref.logic.importer.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

public class Constants {
public static final char[] ZIP_HEADER_MAGIC_NUMBER = {0x50, 0x4b, 0x03, 0x04};

public static final List<String> ZIP_FILES_EXTENSIONS = List.of(
".ctv6bak",
".zip",
".epub",
".odt",
".docx",
".xlsx",
".pptx",
".ods",
".odp"
);

public static final String DC_NAMESPACE = "http://purl.org/dc/elements/1.1/";

public static boolean isZip(BufferedReader input) throws IOException {
char[] header = new char[ZIP_HEADER_MAGIC_NUMBER.length];
int nRead = input.read(header);
return nRead == ZIP_HEADER_MAGIC_NUMBER.length && Arrays.equals(header, ZIP_HEADER_MAGIC_NUMBER);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ public enum StandardFileType implements FileType {
MARKDOWN("Markdown", "md"),
MEDLINE("Medline", "nbib", "xml"),
MEDLINE_PLAIN("Medline Plain", "nbib", "txt"),
ODP("OpenOffice Impress", "odp"),
ODS("OpenOffice Calc", "ods"),
ODT("OpenOffice Writer", "odt"),
PDF("PDF", "pdf"),
PUBMED("Pubmed", "fcgi"),
RDF("RDF", "rdf"),
Expand Down
8 changes: 8 additions & 0 deletions jablib/src/main/java/org/jabref/model/entry/BibEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,14 @@ public BibEntry withField(Field field, String value) {
return this;
}

public BibEntry withField(Field field, Optional<String> value) {
value.ifPresent(v -> {
setField(field, v);
this.setChanged(false);
});
return this;
}

/**
* A copy is made of the parameter
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package org.jabref.logic.importer.fileformat.docs;

import java.io.IOException;
import java.util.List;
import java.util.function.Predicate;
import java.util.stream.Stream;

import org.jabref.logic.importer.ImportException;
import org.jabref.logic.importer.fileformat.ImporterTestEngine;
import org.jabref.logic.importer.util.Constants;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

public class OdpImporterFilesTest {
private static final String FILE_ENDING = ".odp";
private static final List<String> EXCLUDE_EXTENSIONS = Constants.ZIP_FILES_EXTENSIONS
.stream()
.filter(ext -> !ext.equals(FILE_ENDING))
.toList();

private OdpImporter importer;

@BeforeEach
void setUp() {
importer = new OdpImporter();
}

private static Stream<String> fileNames() throws IOException {
Predicate<String> fileName = name -> name.startsWith("OdpImporterTest") && name.endsWith(FILE_ENDING);
return ImporterTestEngine.getTestFiles(fileName).stream();
}

private static Stream<String> invalidFileNames() throws IOException {
Predicate<String> fileName = name -> !name.startsWith("OdpImporterTest") && EXCLUDE_EXTENSIONS.stream().noneMatch(name::endsWith);
return ImporterTestEngine.getTestFiles(fileName).stream();
}

@ParameterizedTest
@MethodSource("fileNames")
void isRecognizedFormat(String fileName) throws IOException {
ImporterTestEngine.testIsRecognizedFormat(importer, fileName);
}

@ParameterizedTest
@MethodSource("invalidFileNames")
void isNotRecognizedFormat(String fileName) throws IOException {
ImporterTestEngine.testIsNotRecognizedFormat(importer, fileName);
}

@ParameterizedTest
@MethodSource("fileNames")
void importEntries(String fileName) throws ImportException, IOException {
ImporterTestEngine.testImportEntries(importer, fileName, FILE_ENDING);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package org.jabref.logic.importer.fileformat.docs;

import org.jabref.logic.util.StandardFileType;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

class OdpImporterTest {
private OdpImporter importer;

@BeforeEach
void setUp() {
importer = new OdpImporter();
}

@Test
void getFormatName() {
assertEquals("OpenDocument Impress", importer.getName());
}

@Test
void getCLIId() {
assertEquals("odp", importer.getId());
}

@Test
void sGetExtensions() {
assertEquals(StandardFileType.ODP, importer.getFileType());
}
}
Loading