Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions dkpro-core-io-pubannotation-asl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,10 @@
<artifactId>dkpro-core-api-ner-asl</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.core.io.conll-asl</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.core.io.pubannotation;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.core.io.pubannotation.internal.GenericPubAnnotation2DKPro;
import org.dkpro.core.io.pubannotation.internal.model.PADocument;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.ObjectMapper;

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes;

/**
* Reader for the PubAnnotation format.
*
* Since the PubAnnotation format only associates spans/relations with simple values and since
* annotations are not typed, it is necessary to define target types and features via
* {@link #PARAM_SPAN_TYPE} and {@link #PARAM_SPAN_LABEL_FEATURE}. In PubAnnotation, every
* annotation has an ID. If the target type has a suitable feature to retain the ID, it can be
* configured via {@link #PARAM_SPAN_ID_FEATURE}.
*
* The {@code sourcedb} and {@code sourceid} from the PubAnnotation document are imported as
* {@link DocumentMetaData#setCollectionId(String) collectionId} and
* {@link DocumentMetaData#setDocumentId(String) documentId} respectively. If present, also the
* {@code target} is imported as {@link DocumentMetaData#setDocumentUri(String) documentUri}. The
* {@link DocumentMetaData#setDocumentBaseUri(String) documentBaseUri} is cleared in this case.
*
* Currently supports only span annotations, i.e. no relations or modifications. Discontinuous
* segments are also not supported.
*
* @see <a href="http://www.pubannotation.org/docs/annotation-format/">PubAnnotation format</a>
*/
@ResourceMetaData(name = "PubAnnotation Reader")
@MimeTypeCapability({MimeTypes.APPLICATION_X_PUB_ANNOTATION_JSON})
@TypeCapability(
outputs = {
"de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData" })
public class GenericPubAnnotationReader
extends JCasResourceCollectionReader_ImplBase
{
/**
* The span annotation type to which the PubAnnotation spans are mapped.
*/
public static final String PARAM_SPAN_TYPE = "spanType";
@ConfigurationParameter(name = PARAM_SPAN_TYPE, mandatory = true)
private String spanType;

/**
* The feature on the span annotation type which receives the ID.
*/
public static final String PARAM_SPAN_ID_FEATURE = "spanIdFeature";
@ConfigurationParameter(name = PARAM_SPAN_ID_FEATURE, mandatory = false)
private String spanIdFeature;

/**
* The feature on the span annotation type which receives the label.
*/
public static final String PARAM_SPAN_LABEL_FEATURE = "spanLabelFeature";
@ConfigurationParameter(name = PARAM_SPAN_LABEL_FEATURE, mandatory = false)
private String spanLabelFeature;

/**
* The feature on the span annotation type which receives the label.
*/
public static final String PARAM_RESOLVE_NAMESPACES = "resolveNamespaces";
@ConfigurationParameter(name = PARAM_RESOLVE_NAMESPACES, mandatory = true, defaultValue = "false")
private boolean resolveNamespaces;

private ObjectMapper mapper;

@Override
public void initialize(UimaContext aContext)
throws ResourceInitializationException
{
super.initialize(aContext);

mapper = new ObjectMapper();
// Hack because LXF dumper presently creates invalid JSON
mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true);
}

@Override
public void getNext(JCas aCAS)
throws IOException, CollectionException
{
Resource res = nextFile();
initCas(aCAS, res);

GenericPubAnnotation2DKPro converter = new GenericPubAnnotation2DKPro();
converter.setSpanMapping(spanType, spanIdFeature, spanLabelFeature);
converter.setResolveNamespaces(resolveNamespaces);

try (InputStream is = new BufferedInputStream(res.getInputStream())) {
PADocument doc = mapper.readValue(is, PADocument.class);
converter.convert(doc, aCAS);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/package org.dkpro.core.io.pubannotation;

import java.io.OutputStream;

import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.core.io.pubannotation.internal.GenericDKPro2PubAnnotation;
import org.dkpro.core.io.pubannotation.internal.model.PADocument;

import com.fasterxml.jackson.databind.ObjectMapper;

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes;

/**
* Writer for the PubAnnotation format.
*
* The {@code sourcedb} and {@code sourceid} from the PubAnnotation document are exported from
* {@link DocumentMetaData#setCollectionId(String) collectionId} and
* {@link DocumentMetaData#setDocumentId(String) documentId} respectively. The {@code target} is
* exported from {@link DocumentMetaData#setDocumentUri(String) documentUri}.
*
* Currently supports only span annotations, i.e. no relations or modifications. Discontinuous
* segments are also not supported.
*
* @see <a href="http://www.pubannotation.org/docs/annotation-format/">PubAnnotation format</a>
*/
@ResourceMetaData(name = "PubAnnotation Writer")
@MimeTypeCapability({MimeTypes.APPLICATION_X_PUB_ANNOTATION_JSON})
@TypeCapability(
inputs = {
"de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData" })
public class GenericPubAnnotationWriter
extends JCasFileWriter_ImplBase
{
/**
* Specify the suffix of output files. Default value <code>.json</code>. If the suffix is not
* needed, provide an empty string as value.
*/
public static final String PARAM_FILENAME_EXTENSION =
ComponentParameters.PARAM_FILENAME_EXTENSION;
@ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".json")
private String filenameSuffix;

private ObjectMapper mapper;

@Override
public void initialize(UimaContext aContext)
throws ResourceInitializationException
{
super.initialize(aContext);

mapper = new ObjectMapper();
}

@Override
public void process(JCas aJCas)
throws AnalysisEngineProcessException
{
PADocument doc = new PADocument();

GenericDKPro2PubAnnotation converter = new GenericDKPro2PubAnnotation();
converter.convert(aJCas, doc);

try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) {
mapper.writerWithDefaultPrettyPrinter().writeValue(docOS, doc);
}
catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
}
Loading