From 9fbaadfad02344567fd9689af9439d39fc22f43c Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Fri, 16 Aug 2024 14:14:08 -0500 Subject: [PATCH 01/11] TIKA-4272: start of some fixes for tika grpc so that it works in tika-docker. plugins were stepping on each other's toes with classpath before. --- tika-core/pom.xml | 6 + .../apache/tika/pipes/fetcher/Fetcher.java | 4 +- tika-fuzzing/pom.xml | 8 +- tika-grpc/pom.xml | 6 + .../tika/pipes/grpc/TikaGrpcServer.java | 29 ++- .../tika/pipes/grpc/TikaGrpcServerImpl.java | 26 ++- tika-grpc/src/main/proto/tika.proto | 12 ++ .../tika/pipes/grpc/TikaGrpcServerTest.java | 5 +- .../test/resources/tika-pipes-test-config.xml | 8 +- tika-integration-tests/pom.xml | 6 + tika-parent/pom.xml | 7 + tika-pipes/tika-fetchers/pom.xml | 68 ++++++- .../tika-fetcher-az-blob/pom.xml | 21 +- .../fetcher/azblob/AZBlobFetcherPlugin.java | 42 ++++ .../src/main/resources/plugin.properties | 21 ++ .../tika-fetchers/tika-fetcher-fs/pom.xml | 104 ++++++++++ .../pipes/fetcher/fs/FileSystemFetcher.java | 182 ++++++++++++++++++ .../fetcher/fs/FileSystemFetcherPlugin.java | 42 ++++ .../fs/config/FileSystemFetcherConfig.java | 42 ++++ .../src/main/resources/plugin.properties | 21 ++ .../tika-fetchers/tika-fetcher-gcs/pom.xml | 8 +- .../pipes/fetcher/gcs/GCSFetcherPlugin.java | 42 ++++ .../src/main/resources/plugin.properties | 21 ++ .../tika-fetchers/tika-fetcher-http/pom.xml | 23 --- .../pipes/fetcher/http/HttpFetcherPlugin.java | 42 ++++ .../src/main/resources/plugin.properties | 21 ++ .../tika-fetcher-microsoft-graph/pom.xml | 20 +- .../microsoftgraph/MicrosoftGraphFetcher.java | 2 + .../microsoftgraph/MicrosoftGraphPlugin.java | 42 ++++ .../src/main/resources/plugin.properties | 21 ++ .../tika-fetchers/tika-fetcher-s3/pom.xml | 13 +- .../pipes/fetcher/s3/S3FetcherPlugin.java | 42 ++++ .../src/main/resources/plugin.properties | 21 ++ .../tika-fetchers/tika-fetcher-url/pom.xml | 101 ++++++++++ .../tika/pipes/fetcher/url/UrlFetcher.java | 53 +++++ .../fetcher/url/config/UrlFetcherConfig.java | 23 +++ .../src/main/resources/plugin.properties | 21 ++ tika-server/tika-server-core/pom.xml | 7 +- 38 files changed, 1090 insertions(+), 93 deletions(-) create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcherPlugin.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/resources/plugin.properties create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherPlugin.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/resources/plugin.properties create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcherPlugin.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/resources/plugin.properties create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcherPlugin.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-http/src/main/resources/plugin.properties create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphPlugin.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/resources/plugin.properties create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3FetcherPlugin.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/resources/plugin.properties create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-url/src/main/resources/plugin.properties diff --git a/tika-core/pom.xml b/tika-core/pom.xml index 7e163c061c..172552a174 100644 --- a/tika-core/pom.xml +++ b/tika-core/pom.xml @@ -40,6 +40,12 @@ org.slf4j slf4j-api + + org.pf4j + pf4j + + provided + commons-io commons-io diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java index 8f7a186fd5..c7e1b3d43d 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java @@ -19,6 +19,8 @@ import java.io.IOException; import java.io.InputStream; +import org.pf4j.ExtensionPoint; + import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; @@ -30,7 +32,7 @@ *

* Implementations of Fetcher must be thread safe. */ -public interface Fetcher { +public interface Fetcher extends ExtensionPoint { String getName(); diff --git a/tika-fuzzing/pom.xml b/tika-fuzzing/pom.xml index 2faa23ce74..fdf94626c2 100644 --- a/tika-fuzzing/pom.xml +++ b/tika-fuzzing/pom.xml @@ -87,6 +87,12 @@ test-jar test + + + org.pf4j + pf4j + provided + @@ -133,4 +139,4 @@ - \ No newline at end of file + diff --git a/tika-grpc/pom.xml b/tika-grpc/pom.xml index d7e40d7483..6cd06e9f30 100644 --- a/tika-grpc/pom.xml +++ b/tika-grpc/pom.xml @@ -41,6 +41,7 @@ 3.0.0 true + 3.12.0 @@ -226,6 +227,11 @@ com.fasterxml.jackson.module jackson-module-jsonSchema + + org.pf4j + pf4j + ${pf4j.version} + com.asarkar.grpc grpc-test diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java index 506522c740..2ef816d3a1 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java @@ -21,6 +21,8 @@ import java.io.File; import java.io.FileWriter; import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.List; import java.util.concurrent.TimeUnit; import com.beust.jcommander.JCommander; @@ -32,11 +34,15 @@ import io.grpc.TlsServerCredentials; import io.grpc.protobuf.services.HealthStatusManager; import io.grpc.protobuf.services.ProtoReflectionService; +import org.pf4j.DefaultPluginManager; +import org.pf4j.PluginManager; +import org.pf4j.PluginWrapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.tika.config.TikaConfig; import org.apache.tika.config.TikaConfigSerializer; +import org.apache.tika.pipes.fetcher.Fetcher; /** * Server that manages startup/shutdown of the GRPC Tika server. @@ -45,12 +51,17 @@ public class TikaGrpcServer { private static final Logger LOGGER = LoggerFactory.getLogger(TikaGrpcServer.class); public static final int TIKA_SERVER_GRPC_DEFAULT_PORT = 50052; private Server server; + // create the plugin manager + private PluginManager pluginManager; @Parameter(names = {"-p", "--port"}, description = "The grpc server port", help = true) private Integer port = TIKA_SERVER_GRPC_DEFAULT_PORT; - @Parameter(names = {"-c", "--config"}, description = "The grpc server port", help = true) + @Parameter(names = {"-c", "--config"}, description = "The grpc server configuration XML file", help = true) private File tikaConfigXml; + @Parameter(names = {"-d", "--plugins-dir"}, description = "Tika pipes plugin root directories", help = true) + private List pluginDirs; + @Parameter(names = {"-s", "--secure"}, description = "Enable credentials required to access this grpc server") private boolean secure; @@ -95,11 +106,25 @@ public void start() throws Exception { TikaConfigSerializer.serialize(new TikaConfig(), TikaConfigSerializer.Mode.STATIC_FULL, fw, StandardCharsets.UTF_8); } } + pluginManager = pluginDirs == null ? new DefaultPluginManager() : new DefaultPluginManager(pluginDirs); + pluginManager.loadPlugins(); + LOGGER.info("Loaded {} plugins", pluginManager.getPlugins().size()); + pluginManager.startPlugins(); + for (PluginWrapper plugin : pluginManager.getStartedPlugins()) { + LOGGER.info("Add-in " + plugin.getPluginId() + " : " + plugin.getDescriptor() + " has started."); + for (Class extension : pluginManager.getExtensionClasses(plugin.getPluginId())) { + LOGGER.info(" Extension " + extension + " has been registered -- {}", extension.isAssignableFrom(Fetcher.class)); + LOGGER.info(" or -- {}", Fetcher.class.isAssignableFrom(extension)); + } + } + for (PluginWrapper plugin : pluginManager.getUnresolvedPlugins()) { + LOGGER.warn("Add-in " + plugin.getPluginId() + " : " + plugin.getDescriptor() + " is unresolved."); + } File tikaConfigFile = new File(tikaConfigXml.getAbsolutePath()); healthStatusManager.setStatus(TikaGrpcServer.class.getSimpleName(), ServingStatus.SERVING); server = Grpc .newServerBuilderForPort(port, creds) - .addService(new TikaGrpcServerImpl(tikaConfigFile.getAbsolutePath())) + .addService(new TikaGrpcServerImpl(tikaConfigFile.getAbsolutePath(), pluginManager)) .addService(healthStatusManager.getHealthService()) .addService(ProtoReflectionService.newInstance()) .build() diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java index 4eb5f0b010..d65178e8cf 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java @@ -45,6 +45,7 @@ import io.grpc.stub.StreamObserver; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; +import org.pf4j.PluginManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; @@ -59,6 +60,8 @@ import org.apache.tika.GetFetcherConfigJsonSchemaRequest; import org.apache.tika.GetFetcherReply; import org.apache.tika.GetFetcherRequest; +import org.apache.tika.ListFetcherPluginsReply; +import org.apache.tika.ListFetcherPluginsRequest; import org.apache.tika.ListFetchersReply; import org.apache.tika.ListFetchersRequest; import org.apache.tika.SaveFetcherReply; @@ -76,6 +79,7 @@ import org.apache.tika.pipes.emitter.EmitKey; import org.apache.tika.pipes.fetcher.AbstractFetcher; import org.apache.tika.pipes.fetcher.FetchKey; +import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.fetcher.config.AbstractConfig; import org.apache.tika.pipes.fetcher.config.FetcherConfigContainer; @@ -87,6 +91,8 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase { } public static final JsonSchemaGenerator JSON_SCHEMA_GENERATOR = new JsonSchemaGenerator(OBJECT_MAPPER); + private final PluginManager pluginManager; + /** * FetcherID is key, The pair is the Fetcher object and the Metadata */ @@ -96,9 +102,8 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase { String tikaConfigPath; - TikaGrpcServerImpl(String tikaConfigPath) - throws TikaConfigException, IOException, ParserConfigurationException, - TransformerException, SAXException { + TikaGrpcServerImpl(String tikaConfigPath, PluginManager pluginManager) throws TikaConfigException, IOException, + ParserConfigurationException, TransformerException, SAXException { File tikaConfigFile = new File(tikaConfigPath); if (!tikaConfigFile.canWrite()) { File tmpTikaConfigFile = File.createTempFile("configCopy", tikaConfigFile.getName()); @@ -114,8 +119,15 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase { expiringFetcherStore = new ExpiringFetcherStore(pipesConfig.getStaleFetcherTimeoutSeconds(), pipesConfig.getStaleFetcherDelaySeconds()); + this.tikaConfigPath = tikaConfigPath; updateTikaConfig(); + + this.pluginManager = pluginManager; + List fetchers = pluginManager.getExtensions(Fetcher.class); + for (Fetcher fetcher : fetchers) { + + } } private void updateTikaConfig() @@ -409,6 +421,14 @@ public void getFetcherConfigJsonSchema(GetFetcherConfigJsonSchemaRequest request responseObserver.onCompleted(); } + @Override + public void listFetcherPlugins(ListFetcherPluginsRequest request, StreamObserver responseObserver) { + for (Fetcher extension : pluginManager.getExtensions(Fetcher.class)) { + responseObserver.onNext(ListFetcherPluginsReply.newBuilder().setFetcherPluginId(extension.getName()).build()); + } + + } + private boolean deleteFetcher(String fetcherName) { return expiringFetcherStore.deleteFetcher(fetcherName); } diff --git a/tika-grpc/src/main/proto/tika.proto b/tika-grpc/src/main/proto/tika.proto index 572ded7abd..8019ca9195 100644 --- a/tika-grpc/src/main/proto/tika.proto +++ b/tika-grpc/src/main/proto/tika.proto @@ -59,6 +59,11 @@ service Tika { Get the Fetcher Config schema for a given fetcher class. */ rpc GetFetcherConfigJsonSchema(GetFetcherConfigJsonSchemaRequest) returns (GetFetcherConfigJsonSchemaReply) {} + /* + List fetcher plugins + */ + rpc ListFetcherPlugins(ListFetcherPluginsRequest) returns (ListFetcherPluginsReply) {} + } message SaveFetcherRequest { @@ -143,3 +148,10 @@ message GetFetcherConfigJsonSchemaReply { // The json schema that describes the fetcher config in string format. string fetcher_config_json_schema = 1; } + +message ListFetcherPluginsRequest { +} + +message ListFetcherPluginsReply { + string fetcher_plugin_id = 1; +} diff --git a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java index 80f391e33b..d5aebed67f 100644 --- a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java +++ b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java @@ -52,6 +52,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.pf4j.DefaultPluginManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -93,7 +94,7 @@ public void testFetcherCrud(Resources resources) throws Exception { Server server = InProcessServerBuilder .forName(serverName) .directExecutor() - .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath())) + .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath(), new DefaultPluginManager())) .build() .start(); resources.register(server, Duration.ofSeconds(10)); @@ -188,7 +189,7 @@ public void testBiStream(Resources resources) throws Exception { Server server = InProcessServerBuilder .forName(serverName) .directExecutor() - .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath())) + .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath(), new DefaultPluginManager())) .build() .start(); resources.register(server, Duration.ofSeconds(10)); diff --git a/tika-grpc/src/test/resources/tika-pipes-test-config.xml b/tika-grpc/src/test/resources/tika-pipes-test-config.xml index e4006edb35..e7f4240c38 100644 --- a/tika-grpc/src/test/resources/tika-pipes-test-config.xml +++ b/tika-grpc/src/test/resources/tika-pipes-test-config.xml @@ -13,8 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ---> - +--> 600 60 @@ -30,6 +29,5 @@ -1 - - - + + \ No newline at end of file diff --git a/tika-integration-tests/pom.xml b/tika-integration-tests/pom.xml index 5d641d852c..620243c894 100644 --- a/tika-integration-tests/pom.xml +++ b/tika-integration-tests/pom.xml @@ -58,6 +58,12 @@ junit-vintage-engine test + + + org.pf4j + pf4j + provided + diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml index 3145cf7d55..d4175450ce 100644 --- a/tika-parent/pom.xml +++ b/tika-parent/pom.xml @@ -445,6 +445,7 @@ 1.5.6-4 9.40 1.5.10 + 3.12.0 @@ -1035,6 +1036,12 @@ jspecify 1.0.0 + + org.pf4j + pf4j + ${pf4j.version} + + diff --git a/tika-pipes/tika-fetchers/pom.xml b/tika-pipes/tika-fetchers/pom.xml index 23b9d73f17..fa86d9db5f 100644 --- a/tika-pipes/tika-fetchers/pom.xml +++ b/tika-pipes/tika-fetchers/pom.xml @@ -37,12 +37,76 @@ tika-fetcher-gcs tika-fetcher-az-blob tika-fetcher-microsoft-graph + tika-fetcher-fs + tika-fetcher-url - + + org.pf4j + pf4j + + provided + + + org.apache.logging.log4j + log4j-core + + + org.apache.logging.log4j + log4j-slf4j2-impl + + + org.slf4j + jcl-over-slf4j + + + org.apache.logging.log4j + log4j-core + + + org.apache.logging.log4j + log4j-slf4j2-impl + + + org.slf4j + jcl-over-slf4j + + + commons-io + commons-io + + + ${project.groupId} + tika-core + ${project.version} + provided + + + ${project.groupId} + tika-core + ${project.version} + test-jar + test + + + ${project.groupId} + tika-serialization + ${project.version} + test + + + org.mockito + mockito-core + test + + + org.junit.jupiter + junit-jupiter-engine + test + 3.0.0-BETA2-rc1 - \ No newline at end of file + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml index f0d7642e52..903007cf2e 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml @@ -29,29 +29,10 @@ Apache Tika Azure Blob fetcher - - ${project.groupId} - tika-core - ${project.version} - provided - com.azure azure-storage-blob - - ${project.groupId} - tika-core - ${project.version} - test-jar - test - - - ${project.groupId} - tika-serialization - ${project.version} - test - @@ -123,4 +104,4 @@ 3.0.0-BETA2-rc1 - \ No newline at end of file + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcherPlugin.java b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcherPlugin.java new file mode 100644 index 0000000000..5ae15613ad --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcherPlugin.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.azblob; + +import org.pf4j.Plugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class AZBlobFetcherPlugin extends Plugin { + private static final Logger LOG = LoggerFactory.getLogger(AZBlobFetcherPlugin.class); + @Override + public void start() { + LOG.info("Starting"); + super.start(); + } + + @Override + public void stop() { + LOG.info("Stopping"); + super.stop(); + } + + @Override + public void delete() { + LOG.info("Deleting"); + super.delete(); + } +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/resources/plugin.properties b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/resources/plugin.properties new file mode 100644 index 0000000000..74dfeaadb6 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/resources/plugin.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +plugin.id=az-blob-fetcher +plugin.class=org.apache.tika.pipes.fetcher.azblob.AZBlobFetcherPlugin +plugin.version=3.0.0-SNAPSHOT +plugin.provider=Azure Blob Fetcher +plugin.description=Capable of taking Blob IDs from AZ and using their bytes as tika parse bytes. diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml new file mode 100644 index 0000000000..e4bb74b24a --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml @@ -0,0 +1,104 @@ + + + + + tika-fetchers + org.apache.tika + 3.0.0-SNAPSHOT + + 4.0.0 + + tika-fetcher-fs + Apache Tika FS Fetcher + Apache Tika Pipes Fetcher for Local File System + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.tika.pipes.fetcher.s3 + + + + + + + test-jar + + + + + + maven-shade-plugin + ${maven.shade.version} + + + package + + shade + + + + false + + + + + *:* + + META-INF/* + LICENSE.txt + NOTICE.txt + + + + + + META-INF/LICENSE + target/classes/META-INF/LICENSE + + + META-INF/NOTICE + target/classes/META-INF/NOTICE + + + META-INF/DEPENDENCIES + target/classes/META-INF/DEPENDENCIES + + + + + + + + + + + + 3.0.0-BETA2-rc1 + + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java new file mode 100644 index 0000000000..bc3c4cddd3 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.fs; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileTime; +import java.util.Date; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.tika.config.Field; +import org.apache.tika.config.Initializable; +import org.apache.tika.config.InitializableProblemHandler; +import org.apache.tika.config.Param; +import org.apache.tika.exception.TikaConfigException; +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.FileSystem; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Property; +import org.apache.tika.metadata.TikaCoreProperties; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.pipes.fetcher.AbstractFetcher; +import org.apache.tika.pipes.fetcher.fs.config.FileSystemFetcherConfig; + +public class FileSystemFetcher extends AbstractFetcher implements Initializable { + public FileSystemFetcher() { + } + + public FileSystemFetcher(FileSystemFetcherConfig fileSystemFetcherConfig) { + setBasePath(fileSystemFetcherConfig.getBasePath()); + setExtractFileSystemMetadata(fileSystemFetcherConfig.isExtractFileSystemMetadata()); + } + + private static final Logger LOG = LoggerFactory.getLogger(FileSystemFetcher.class); + + //Warning! basePath can be null! + private Path basePath = null; + + private boolean extractFileSystemMetadata = false; + + static boolean isDescendant(Path root, Path descendant) { + return descendant.toAbsolutePath().normalize() + .startsWith(root.toAbsolutePath().normalize()); + } + + @Override + public InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) throws IOException, TikaException { + if (fetchKey.contains("\u0000")) { + throw new IllegalArgumentException("Path must not contain 'u0000'. " + + "Please review the life decisions that led you to requesting " + + "a file name with this character in it."); + } + Path p = null; + if (basePath != null) { + p = basePath.resolve(fetchKey); + if (!p.toRealPath().startsWith(basePath.toRealPath())) { + throw new IllegalArgumentException( + "fetchKey must resolve to be a descendant of the 'basePath'"); + } + } else { + p = Paths.get(fetchKey); + } + + metadata.set(TikaCoreProperties.SOURCE_PATH, fetchKey); + updateFileSystemMetadata(p, metadata); + + if (!Files.isRegularFile(p)) { + if (basePath != null && !Files.isDirectory(basePath)) { + throw new IOException("BasePath is not a directory: " + basePath); + } else { + throw new FileNotFoundException(p.toAbsolutePath().toString()); + } + } + + return TikaInputStream.get(p, metadata); + } + + private void updateFileSystemMetadata(Path p, Metadata metadata) throws IOException { + if (! extractFileSystemMetadata) { + return; + } + BasicFileAttributes attrs = Files.readAttributes(p, BasicFileAttributes.class); + updateFileTime(FileSystem.CREATED, attrs.creationTime(), metadata); + updateFileTime(FileSystem.MODIFIED, attrs.lastModifiedTime(), metadata); + updateFileTime(FileSystem.ACCESSED, attrs.lastAccessTime(), metadata); + //TODO extract owner or group? + } + + private void updateFileTime(Property property, FileTime fileTime, Metadata metadata) { + if (fileTime == null) { + return; + } + metadata.set(property, new Date(fileTime.toMillis())); + } + + /** + * + * @return the basePath or null if no base path was set + */ + public Path getBasePath() { + return basePath; + } + + /** + * Default behavior si that clients will send in relative paths, this + * must be set to allow this fetcher to fetch the + * full path. + * + * @param basePath + */ + @Field + public void setBasePath(String basePath) { + this.basePath = Paths.get(basePath); + } + + /** + * Extract file system metadata (created, modified, accessed) when fetching file. + * The default is false. + * + * @param extractFileSystemMetadata + */ + @Field + public void setExtractFileSystemMetadata(boolean extractFileSystemMetadata) { + this.extractFileSystemMetadata = extractFileSystemMetadata; + } + + @Override + public void initialize(Map params) throws TikaConfigException { + //no-op + } + + @Override + public void checkInitialization(InitializableProblemHandler problemHandler) + throws TikaConfigException { + if (basePath == null || basePath.toString().trim().length() == 0) { + LOG.warn("'basePath' has not been set. " + + "This means that client code or clients can read from any file that this " + + "process has permissions to read. If you are running tika-server, make " + + "absolutely certain that you've locked down " + + "access to tika-server and file-permissions for the tika-server process."); + return; + } + if (basePath.toString().startsWith("http://")) { + throw new TikaConfigException("FileSystemFetcher only works with local file systems. " + + " Please use the tika-fetcher-http module for http calls"); + } else if (basePath.toString().startsWith("ftp://")) { + throw new TikaConfigException("FileSystemFetcher only works with local file systems. " + + " Please consider contributing an ftp fetcher module"); + } else if (basePath.toString().startsWith("s3://")) { + throw new TikaConfigException("FileSystemFetcher only works with local file systems. " + + " Please use the tika-fetcher-s3 module"); + } + + if (basePath.toAbsolutePath().toString().contains("\u0000")) { + throw new TikaConfigException( + "base path must not contain \u0000. " + "Seriously, what were you thinking?"); + } + } +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherPlugin.java b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherPlugin.java new file mode 100644 index 0000000000..931aa10892 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherPlugin.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.fs; + +import org.pf4j.Plugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FileSystemFetcherPlugin extends Plugin { + private static final Logger LOG = LoggerFactory.getLogger(FileSystemFetcherPlugin.class); + @Override + public void start() { + LOG.info("Starting"); + super.start(); + } + + @Override + public void stop() { + LOG.info("Stopping"); + super.stop(); + } + + @Override + public void delete() { + LOG.info("Deleting"); + super.delete(); + } +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java new file mode 100644 index 0000000000..b9f155fbd7 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.fs.config; + +import org.apache.tika.pipes.fetcher.config.AbstractConfig; + +public class FileSystemFetcherConfig extends AbstractConfig { + private String basePath; + private boolean extractFileSystemMetadata; + + public String getBasePath() { + return basePath; + } + + public FileSystemFetcherConfig setBasePath(String basePath) { + this.basePath = basePath; + return this; + } + + public boolean isExtractFileSystemMetadata() { + return extractFileSystemMetadata; + } + + public FileSystemFetcherConfig setExtractFileSystemMetadata(boolean extractFileSystemMetadata) { + this.extractFileSystemMetadata = extractFileSystemMetadata; + return this; + } +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/resources/plugin.properties b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/resources/plugin.properties new file mode 100644 index 0000000000..41b443a518 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/resources/plugin.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +plugin.id=fs-fetcher +plugin.class=org.apache.tika.pipes.fetcher.fs.FileSystemFetcherPlugin +plugin.version=3.0.0-SNAPSHOT +plugin.provider=Local File System Fetcher +plugin.description=Capable of fetching the local file system diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml index e3f5044d40..f975d145e8 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml @@ -29,12 +29,6 @@ Apache Tika Google Cloud Storage fetcher - - ${project.groupId} - tika-core - ${project.version} - provided - com.google.cloud google-cloud-storage @@ -110,4 +104,4 @@ 3.0.0-BETA2-rc1 - \ No newline at end of file + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcherPlugin.java b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcherPlugin.java new file mode 100644 index 0000000000..c90ebb140b --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcherPlugin.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.gcs; + +import org.pf4j.Plugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class GCSFetcherPlugin extends Plugin { + private static final Logger LOG = LoggerFactory.getLogger(GCSFetcherPlugin.class); + @Override + public void start() { + LOG.info("Starting"); + super.start(); + } + + @Override + public void stop() { + LOG.info("Stopping"); + super.stop(); + } + + @Override + public void delete() { + LOG.info("Deleting"); + super.delete(); + } +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/resources/plugin.properties b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/resources/plugin.properties new file mode 100644 index 0000000000..79e5590e87 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/resources/plugin.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +plugin.id=gcs-fetcher +plugin.class=org.apache.tika.pipes.fetcher.gcs.GCSFetcherPlugin +plugin.version=3.0.0-SNAPSHOT +plugin.provider=GCS Fetcher +plugin.description=GCS Fetchedr diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml index 320569ed53..4d0761dd50 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml @@ -29,17 +29,6 @@ Apache Tika http fetcher - - org.apache.logging.log4j - log4j-slf4j2-impl - provided - - - ${project.groupId} - tika-core - ${project.version} - provided - ${project.groupId} tika-httpclient-commons @@ -61,18 +50,6 @@ com.fasterxml.jackson.core jackson-annotations - - ${project.groupId} - tika-core - ${project.version} - test-jar - test - - - org.mockito - mockito-core - test - diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcherPlugin.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcherPlugin.java new file mode 100644 index 0000000000..bd77c10268 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcherPlugin.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.http; + +import org.pf4j.Plugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HttpFetcherPlugin extends Plugin { + private static final Logger LOG = LoggerFactory.getLogger(HttpFetcherPlugin.class); + @Override + public void start() { + LOG.info("Starting"); + super.start(); + } + + @Override + public void stop() { + LOG.info("Stopping"); + super.stop(); + } + + @Override + public void delete() { + LOG.info("Deleting"); + super.delete(); + } +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/resources/plugin.properties b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/resources/plugin.properties new file mode 100644 index 0000000000..ecd3cb5123 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/resources/plugin.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +plugin.id=http-fetcher +plugin.class=org.apache.tika.pipes.fetcher.http.HttpFetcherPlugin +plugin.version=3.0.0-SNAPSHOT +plugin.provider=HTTP Fetcher +plugin.description=HTTP web request fetcher diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml index ecee31f26d..66951cf2c3 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml @@ -56,11 +56,6 @@ - - ${project.groupId} - tika-core - ${project.version} - com.microsoft.graph microsoft-graph @@ -80,16 +75,6 @@ - - org.junit.jupiter - junit-jupiter-engine - test - - - org.mockito - mockito-core - test - org.mockito mockito-junit-jupiter @@ -101,7 +86,6 @@ nimbus-jose-jwt - @@ -159,6 +143,10 @@ META-INF/DEPENDENCIES target/classes/META-INF/DEPENDENCIES + + META-INF/extensions.idx + target/classes/META-INF/extensions.idx + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcher.java index 3c27795d3a..cb74b77f00 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphFetcher.java @@ -24,6 +24,7 @@ import com.azure.identity.ClientCertificateCredentialBuilder; import com.azure.identity.ClientSecretCredentialBuilder; import com.microsoft.graph.serviceclient.GraphServiceClient; +import org.pf4j.Extension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,6 +45,7 @@ * Fetches files from Microsoft Graph API. * Fetch keys are ${siteDriveId},${driveItemId} */ +@Extension public class MicrosoftGraphFetcher extends AbstractFetcher implements Initializable { private static final Logger LOGGER = LoggerFactory.getLogger(MicrosoftGraphFetcher.class); private GraphServiceClient graphClient; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphPlugin.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphPlugin.java new file mode 100644 index 0000000000..541ba0f933 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/MicrosoftGraphPlugin.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetchers.microsoftgraph; + +import org.pf4j.Plugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MicrosoftGraphPlugin extends Plugin { + private static final Logger LOG = LoggerFactory.getLogger(MicrosoftGraphPlugin.class); + @Override + public void start() { + LOG.info("Starting"); + super.start(); + } + + @Override + public void stop() { + LOG.info("Stopping"); + super.stop(); + } + + @Override + public void delete() { + LOG.info("Deleting"); + super.delete(); + } +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/resources/plugin.properties b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/resources/plugin.properties new file mode 100644 index 0000000000..6d7e508e14 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/resources/plugin.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +plugin.id=microsoft-graph-fetcher +plugin.class=org.apache.tika.pipes.fetchers.microsoftgraph.MicrosoftGraphPlugin +plugin.version=3.0.0-SNAPSHOT +plugin.provider=Microsoft Graph Fetcher +plugin.description=Uses the Microsoft Graph API to fetch data diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml index 8c06d00993..2189451e19 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml @@ -34,15 +34,8 @@ aws-java-sdk-s3 - org.apache.logging.log4j - log4j-slf4j2-impl - provided - - - ${project.groupId} - tika-core - ${project.version} - provided + commons-io + commons-io @@ -115,4 +108,4 @@ 3.0.0-BETA2-rc1 - \ No newline at end of file + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3FetcherPlugin.java b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3FetcherPlugin.java new file mode 100644 index 0000000000..97676ca7d8 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3FetcherPlugin.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.s3; + +import org.pf4j.Plugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class S3FetcherPlugin extends Plugin { + private static final Logger LOG = LoggerFactory.getLogger(S3FetcherPlugin.class); + @Override + public void start() { + LOG.info("Starting"); + super.start(); + } + + @Override + public void stop() { + LOG.info("Stopping"); + super.stop(); + } + + @Override + public void delete() { + LOG.info("Deleting"); + super.delete(); + } +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/resources/plugin.properties b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/resources/plugin.properties new file mode 100644 index 0000000000..31bc1c52c5 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/resources/plugin.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +plugin.id=s3-fetcher +plugin.class=org.apache.tika.pipes.fetcher.s3.S3FetcherPlugin +plugin.version=3.0.0-SNAPSHOT +plugin.provider=S3 Fetcher +plugin.description=Capable of using amazon s3 sdk and fetching content. diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml new file mode 100644 index 0000000000..860f815fc9 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml @@ -0,0 +1,101 @@ + + + + + tika-fetchers + org.apache.tika + 3.0.0-SNAPSHOT + + 4.0.0 + + tika-fetcher-url + Apache Tika URL Fetcher + Apache Tika Pipes Fetcher for HTTP URLs + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.tika.pipes.fetcher.s3 + + + + + + + test-jar + + + + + + maven-shade-plugin + ${maven.shade.version} + + + package + + shade + + + + false + + + + + *:* + + META-INF/* + LICENSE.txt + NOTICE.txt + + + + + + META-INF/LICENSE + target/classes/META-INF/LICENSE + + + META-INF/NOTICE + target/classes/META-INF/NOTICE + + + META-INF/DEPENDENCIES + target/classes/META-INF/DEPENDENCIES + + + + + + + + + + + + 3.0.0-BETA2-rc1 + + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java new file mode 100644 index 0000000000..7692516cd0 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.url; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.Locale; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.pipes.fetcher.AbstractFetcher; + +/** + * Simple fetcher for URLs. This simply calls {@link TikaInputStream#get(URL)}. + * This intentionally does not support fetching for files. + * Please use the FileSystemFetcher for that. If you need more advanced control (passwords, + * timeouts, proxies, etc), please use the tika-fetcher-http module. + */ +public class UrlFetcher extends AbstractFetcher { + + @Override + public InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) throws IOException, TikaException { + if (fetchKey.contains("\u0000")) { + throw new IllegalArgumentException("URL must not contain \u0000. " + + "Please review the life decisions that led you to requesting " + + "a URL with this character in it."); + } + if (fetchKey.toLowerCase(Locale.US).trim().startsWith("file:")) { + throw new IllegalArgumentException( + "The UrlFetcher does not fetch from file shares; " + + "please use the FileSystemFetcher"); + } + return TikaInputStream.get(new URL(fetchKey), metadata); + } + +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java new file mode 100644 index 0000000000..0750b780bb --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.url.config; + +import org.apache.tika.pipes.fetcher.config.AbstractConfig; + +public class UrlFetcherConfig extends AbstractConfig { + // no fetcher config needed at this time. +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/resources/plugin.properties b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/resources/plugin.properties new file mode 100644 index 0000000000..cc36bf1f5f --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/resources/plugin.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +plugin.id=url-fetcher +plugin.class=org.apache.tika.pipes.fetcher.url.UrlFetcherPlugin +plugin.version=3.0.0-SNAPSHOT +plugin.provider=URL Fetcher +plugin.description=Capable of fetching URLs diff --git a/tika-server/tika-server-core/pom.xml b/tika-server/tika-server-core/pom.xml index b7120731d3..a0d821a844 100644 --- a/tika-server/tika-server-core/pom.xml +++ b/tika-server/tika-server-core/pom.xml @@ -130,6 +130,11 @@ org.apache.logging.log4j log4j-slf4j2-impl + + org.pf4j + pf4j + provided + @@ -335,4 +340,4 @@ 3.0.0-BETA2-rc1 - \ No newline at end of file + From 558c11966f3f386c3bcfcabd9ef9372bed106086 Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Sun, 18 Aug 2024 10:36:44 -0500 Subject: [PATCH 02/11] TIKA-4272: use the maven dependency plugin and maven assembly plugin --- pom.xml | 1 + tika-grpc/pom.xml | 98 +++++++++--------------- tika-grpc/src/assembly/grpc-assembly.xml | 23 ++++++ 3 files changed, 60 insertions(+), 62 deletions(-) create mode 100644 tika-grpc/src/assembly/grpc-assembly.xml diff --git a/pom.xml b/pom.xml index 552f4ceb5a..7a488f2d99 100644 --- a/pom.xml +++ b/pom.xml @@ -114,6 +114,7 @@ + diff --git a/tika-grpc/pom.xml b/tika-grpc/pom.xml index 48e0a39207..659995a871 100644 --- a/tika-grpc/pom.xml +++ b/tika-grpc/pom.xml @@ -341,68 +341,6 @@ org.apache.tika.pipes.grpc.TikaGrpcServer - - maven-shade-plugin - ${maven.shade.version} - - - package - - shade - - - - false - - - - - - - - *:* - - module-info.class - META-INF/maven/plugin.xml - META-INF/versions/9/module-info.class - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - META-INF/*.txt - META-INF/ASL2.0 - META-INF/DEPENDENCIES - META-INF/LICENSE - META-INF/NOTICE - META-INF/README - META-INF/MANIFEST.MF - LICENSE.txt - NOTICE.txt - CHANGES - README - builddef.lst - - - - - - org.apache.tika.pipes.grpc.TikaGrpcServer - - true - - - - META-INF/LICENSE - target/classes/META-INF/LICENSE - - - META-INF/NOTICE - target/classes/META-INF/NOTICE - - - - - - org.apache.maven.plugins maven-checkstyle-plugin @@ -435,6 +373,42 @@ + + org.apache.maven.plugins + maven-dependency-plugin + 3.6.1 + + + copy-dependencies + package + + copy-dependencies + + + ${project.build.directory}/dependencies + + + + + + maven-assembly-plugin + 3.7.0 + + + src/assembly/grpc-assembly.xml + + false + + + + make-assembly + package + + single + + + + diff --git a/tika-grpc/src/assembly/grpc-assembly.xml b/tika-grpc/src/assembly/grpc-assembly.xml new file mode 100644 index 0000000000..ee99848ed5 --- /dev/null +++ b/tika-grpc/src/assembly/grpc-assembly.xml @@ -0,0 +1,23 @@ + + dependencies-zip + + zip + + false + + + ${project.build.directory}/dependencies + / + + + ${project.build.directory} + / + + *.jar + + + + From 82de0863e8a0617661f391cbf2b4e661a328ec51 Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Sun, 18 Aug 2024 10:39:22 -0500 Subject: [PATCH 03/11] TIKA-4272: fix transformer removed from merge --- tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml index af920202a9..97850fbe26 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml @@ -176,6 +176,10 @@ META-INF/DEPENDENCIES target/classes/META-INF/DEPENDENCIES + + META-INF/extensions.idx + target/classes/META-INF/extensions.idx + From 562c05b6f9ec3d2ed8d009d14a9aa79bbec94ea8 Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Wed, 21 Aug 2024 07:32:25 -0500 Subject: [PATCH 04/11] TIKA-4272: completely refactor how fetchers work by integrating pf4j --- .../tika/pipes/fetcher/AbstractFetcher.java | 20 ++- .../tika/pipes/fetcher/EmptyFetcher.java | 6 +- .../apache/tika/pipes/fetcher/Fetcher.java | 2 +- .../tika/pipes/fetcher/FetcherManager.java | 8 +- ...AbstractConfig.java => FetcherConfig.java} | 28 ++++- .../fs/config/FileSystemFetcherConfig.java | 11 +- .../tika/pipes/grpc/ExpiringFetcherStore.java | 45 +++---- .../tika/pipes/grpc/TikaGrpcServerImpl.java | 118 +++++++++--------- .../grpc/exception/TikaGrpcException.java | 7 ++ tika-grpc/src/main/proto/tika.proto | 6 +- .../pipes/grpc/ExpiringFetcherStoreTest.java | 4 +- .../azblob/config/AZBlobFetcherConfig.java | 12 +- .../fs/config/FileSystemFetcherConfig.java | 11 +- .../fetcher/gcs/config/GCSFetcherConfig.java | 11 +- .../http/config/HttpFetcherConfig.java | 11 +- .../config/MicrosoftGraphFetcherConfig.java | 10 +- .../fetcher/s3/config/S3FetcherConfig.java | 11 +- .../fetcher/url/config/UrlFetcherConfig.java | 12 +- 18 files changed, 203 insertions(+), 130 deletions(-) rename tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/{AbstractConfig.java => FetcherConfig.java} (52%) create mode 100644 tika-grpc/src/main/java/org/apache/tika/pipes/grpc/exception/TikaGrpcException.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/AbstractFetcher.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/AbstractFetcher.java index 0b417e3fb1..71a1cd394a 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/AbstractFetcher.java +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/AbstractFetcher.java @@ -16,29 +16,23 @@ */ package org.apache.tika.pipes.fetcher; -import org.apache.tika.config.Field; - - public abstract class AbstractFetcher implements Fetcher { - private String name; + private String pluginId; public AbstractFetcher() { } - public AbstractFetcher(String name) { - this.name = name; + public AbstractFetcher(String pluginId) { + this.pluginId = pluginId; } - @Override - public String getName() { - return name; + public String getPluginId() { + return pluginId; } - @Field - public void setName(String name) { - this.name = name; + public void setPluginId(String pluginId) { + this.pluginId = pluginId; } - } diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java index d64f815244..60bb81d6a0 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java @@ -25,9 +25,11 @@ public class EmptyFetcher implements Fetcher { + public static final String PLUGIN_ID = "empty-fetcher"; + @Override - public String getName() { - return "empty"; + public String getPluginId() { + return PLUGIN_ID; } @Override diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java index c7e1b3d43d..dd8ee695a7 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java @@ -34,7 +34,7 @@ */ public interface Fetcher extends ExtensionPoint { - String getName(); + String getPluginId(); InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) throws TikaException, IOException; } diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java index 40121f9a7e..007b052f18 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java @@ -47,15 +47,15 @@ public static FetcherManager load(Path p) throws IOException, TikaConfigExceptio public FetcherManager(List fetchers) throws TikaConfigException { for (Fetcher fetcher : fetchers) { - String name = fetcher.getName(); + String name = fetcher.getPluginId(); if (name == null || name.trim().length() == 0) { throw new TikaConfigException("fetcher name must not be blank"); } - if (fetcherMap.containsKey(fetcher.getName())) { + if (fetcherMap.containsKey(fetcher.getPluginId())) { throw new TikaConfigException( - "Multiple fetchers cannot support the same prefix: " + fetcher.getName()); + "Multiple fetchers cannot support the same prefix: " + fetcher.getPluginId()); } - fetcherMap.put(fetcher.getName(), fetcher); + fetcherMap.put(fetcher.getPluginId(), fetcher); } } diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/AbstractConfig.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java similarity index 52% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/AbstractConfig.java rename to tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java index a1c7e48734..241211f36e 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/AbstractConfig.java +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java @@ -16,6 +16,30 @@ */ package org.apache.tika.pipes.fetcher.config; -public abstract class AbstractConfig { - // Nothing to do here yet. +import java.io.IOException; +import java.util.Properties; + +public abstract class FetcherConfig { + private String fetcherId; + + abstract public String getPluginId(); + + public String getFetcherId() { + return fetcherId; + } + + public FetcherConfig setFetcherId(String fetcherId) { + this.fetcherId = fetcherId; + return this; + } + + public static String getPluginIdForFetcherConfig(Class clazz) { + Properties properties = new Properties(); + try { + properties.load(clazz.getResourceAsStream("/plugin.properties")); + return properties.getProperty("plugin.id"); + } catch (IOException e) { + throw new IllegalStateException("Cannot find plugin.properties for plugin", e); + } + } } diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java index b9f155fbd7..52ec0ecaab 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java @@ -16,9 +16,16 @@ */ package org.apache.tika.pipes.fetcher.fs.config; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; -public class FileSystemFetcherConfig extends AbstractConfig { +public class FileSystemFetcherConfig extends FetcherConfig { + + public static final String PLUGIN_ID = getPluginIdForFetcherConfig(FileSystemFetcherConfig.class); + + @Override + public String getPluginId() { + return PLUGIN_ID; + } private String basePath; private boolean extractFileSystemMetadata; diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/ExpiringFetcherStore.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/ExpiringFetcherStore.java index d21f11b08f..35654a8643 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/ExpiringFetcherStore.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/ExpiringFetcherStore.java @@ -29,14 +29,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.tika.pipes.fetcher.AbstractFetcher; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; public class ExpiringFetcherStore implements AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger(ExpiringFetcherStore.class); public static final long EXPIRE_JOB_INITIAL_DELAY = 1L; - private final Map fetchers = Collections.synchronizedMap(new HashMap<>()); - private final Map fetcherConfigs = Collections.synchronizedMap(new HashMap<>()); + private final Map fetcherConfigs = Collections.synchronizedMap(new HashMap<>()); private final Map fetcherLastAccessed = Collections.synchronizedMap(new HashMap<>()); private final ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor(); @@ -44,18 +42,18 @@ public class ExpiringFetcherStore implements AutoCloseable { public ExpiringFetcherStore(int expireAfterSeconds, int checkForExpiredFetchersDelaySeconds) { executorService.scheduleAtFixedRate(() -> { Set expired = new HashSet<>(); - for (String fetcherName : fetchers.keySet()) { - Instant lastAccessed = fetcherLastAccessed.get(fetcherName); + for (String fetcherId : fetcherConfigs.keySet()) { + Instant lastAccessed = fetcherLastAccessed.get(fetcherId); if (lastAccessed == null) { - LOG.error("Detected a fetcher with no last access time. FetcherName={}", fetcherName); - expired.add(fetcherName); + LOG.error("Detected a fetcher with no last access time. fetcherId={}", fetcherId); + expired.add(fetcherId); } else if (Instant .now() .isAfter(lastAccessed.plusSeconds(expireAfterSeconds))) { - LOG.info("Detected stale fetcher {} hasn't been accessed in {} seconds. " + "Deleting.", fetcherName, Instant + LOG.info("Detected stale fetcher {} hasn't been accessed in {} seconds. " + "Deleting.", fetcherId, Instant .now() .getEpochSecond() - lastAccessed.getEpochSecond()); - expired.add(fetcherName); + expired.add(fetcherId); } } for (String expiredFetcherId : expired) { @@ -64,18 +62,13 @@ public ExpiringFetcherStore(int expireAfterSeconds, int checkForExpiredFetchersD }, EXPIRE_JOB_INITIAL_DELAY, checkForExpiredFetchersDelaySeconds, TimeUnit.SECONDS); } - public boolean deleteFetcher(String fetcherName) { - boolean success = fetchers.remove(fetcherName) != null; - fetcherConfigs.remove(fetcherName); - fetcherLastAccessed.remove(fetcherName); + public boolean deleteFetcher(String fetcherId) { + boolean success = fetcherConfigs.remove(fetcherId) != null; + fetcherLastAccessed.remove(fetcherId); return success; } - public Map getFetchers() { - return fetchers; - } - - public Map getFetcherConfigs() { + public Map getFetcherConfigs() { return fetcherConfigs; } @@ -83,15 +76,15 @@ public Map getFetcherConfigs() { * This method will get the fetcher, but will also log the access the fetcher as having * been accessed. This prevents the scheduled job from removing the stale fetcher. */ - public T getFetcherAndLogAccess(String fetcherName) { - fetcherLastAccessed.put(fetcherName, Instant.now()); - return (T) fetchers.get(fetcherName); + public C getFetcherAndLogAccess(String fetcherId) { + fetcherLastAccessed.put(fetcherId, Instant.now()); + return (C) fetcherConfigs.get(fetcherId); } - public void createFetcher(T fetcher, C config) { - fetchers.put(fetcher.getName(), fetcher); - fetcherConfigs.put(fetcher.getName(), config); - getFetcherAndLogAccess(fetcher.getName()); + public void createFetcher(String fetcherId, C config) { + config.setFetcherId(fetcherId); + fetcherConfigs.put(fetcherId, config); + getFetcherAndLogAccess(fetcherId); } @Override diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java index d65178e8cf..1f4a81fa8d 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java @@ -19,7 +19,6 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; -import java.lang.reflect.InvocationTargetException; import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.LinkedHashMap; @@ -77,11 +76,11 @@ import org.apache.tika.pipes.PipesConfig; import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.emitter.EmitKey; -import org.apache.tika.pipes.fetcher.AbstractFetcher; import org.apache.tika.pipes.fetcher.FetchKey; import org.apache.tika.pipes.fetcher.Fetcher; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; import org.apache.tika.pipes.fetcher.config.FetcherConfigContainer; +import org.apache.tika.pipes.grpc.exception.TikaGrpcException; class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase { private static final Logger LOG = LoggerFactory.getLogger(TikaGrpcServerImpl.class); @@ -124,10 +123,6 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase { updateTikaConfig(); this.pluginManager = pluginManager; - List fetchers = pluginManager.getExtensions(Fetcher.class); - for (Fetcher fetcher : fetchers) { - - } } private void updateTikaConfig() @@ -143,17 +138,17 @@ private void updateTikaConfig() for (int i = 0; i < fetchersElement.getChildNodes().getLength(); ++i) { fetchersElement.removeChild(fetchersElement.getChildNodes().item(i)); } - for (var fetcherEntry : expiringFetcherStore.getFetchers().entrySet()) { - AbstractFetcher fetcherObject = fetcherEntry.getValue(); + for (var fetcherConfigEntry : expiringFetcherStore.getFetcherConfigs().entrySet()) { + Fetcher fetcherObject = getFetcher(fetcherConfigEntry.getValue().getPluginId()); Map fetcherConfigParams = OBJECT_MAPPER.convertValue( - expiringFetcherStore.getFetcherConfigs().get(fetcherEntry.getKey()), + expiringFetcherStore.getFetcherConfigs().get(fetcherConfigEntry.getKey()), new TypeReference<>() { }); Element fetcher = tikaConfigDoc.createElement("fetcher"); - fetcher.setAttribute("class", fetcherEntry.getValue().getClass().getName()); - Element fetcherName = tikaConfigDoc.createElement("name"); - fetcherName.setTextContent(fetcherObject.getName()); - fetcher.appendChild(fetcherName); + fetcher.setAttribute("class", fetcherObject.getClass().getName()); + Element pluginIdElm = tikaConfigDoc.createElement("pluginId"); + pluginIdElm.setTextContent(fetcherObject.getPluginId()); + fetcher.appendChild(pluginIdElm); populateFetcherConfigs(fetcherConfigParams, tikaConfigDoc, fetcher); fetchersElement.appendChild(fetcher); } @@ -222,9 +217,9 @@ public void fetchAndParse(FetchAndParseRequest request, private void fetchAndParseImpl(FetchAndParseRequest request, StreamObserver responseObserver) { - AbstractFetcher fetcher = + FetcherConfig fetcherConfig = expiringFetcherStore.getFetcherAndLogAccess(request.getFetcherId()); - if (fetcher == null) { + if (fetcherConfig == null) { throw new RuntimeException( "Could not find fetcher with name " + request.getFetcherId()); } @@ -234,16 +229,16 @@ private void fetchAndParseImpl(FetchAndParseRequest request, String additionalFetchConfigJson = request.getAdditionalFetchConfigJson(); if (StringUtils.isNotBlank(additionalFetchConfigJson)) { // The fetch and parse has the option to specify additional configuration - AbstractConfig abstractConfig = expiringFetcherStore + FetcherConfig abstractFetcherConfig = expiringFetcherStore .getFetcherConfigs() - .get(fetcher.getName()); + .get(request.getFetcherId()); parseContext.set(FetcherConfigContainer.class, new FetcherConfigContainer() - .setConfigClassName(abstractConfig + .setConfigClassName(abstractFetcherConfig .getClass().getName()) .setJson(additionalFetchConfigJson)); } PipesResult pipesResult = pipesClient.process(new FetchEmitTuple(request.getFetchKey(), - new FetchKey(fetcher.getName(), request.getFetchKey()), new EmitKey(), tikaMetadata, parseContext, FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP)); + new FetchKey(request.getFetcherId(), request.getFetchKey()), new EmitKey(), tikaMetadata, parseContext, FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP)); FetchAndParseReply.Builder fetchReplyBuilder = FetchAndParseReply.newBuilder() .setFetchKey(request.getFetchKey()) @@ -278,7 +273,7 @@ public void saveFetcher(SaveFetcherRequest request, try { Map fetcherConfigMap = OBJECT_MAPPER.readValue(request.getFetcherConfigJson(), new TypeReference<>() {}); Map tikaParamsMap = createTikaParamMap(fetcherConfigMap); - saveFetcher(request.getFetcherId(), request.getFetcherClass(), fetcherConfigMap, tikaParamsMap); + saveFetcher(request.getFetcherId(), request.getPluginId(), fetcherConfigMap, tikaParamsMap); updateTikaConfig(); } catch (Exception e) { throw new RuntimeException(e); @@ -287,35 +282,32 @@ public void saveFetcher(SaveFetcherRequest request, responseObserver.onCompleted(); } - private void saveFetcher(String name, String fetcherClassName, Map paramsMap, Map tikaParamsMap) { + private void saveFetcher(String fetcherId, String pluginId, Map paramsMap, Map tikaParamsMap) { try { if (paramsMap == null) { paramsMap = new LinkedHashMap<>(); } - Class fetcherClass = - (Class) Class.forName(fetcherClassName); + Fetcher fetcher = getFetcher(pluginId); + Class fetcherClass = fetcher.getClass(); String configClassName = fetcherClass.getPackageName() + ".config." + fetcherClass.getSimpleName() + "Config"; - Class configClass = - (Class) Class.forName(configClassName); - AbstractConfig configObject = OBJECT_MAPPER.convertValue(paramsMap, configClass); - AbstractFetcher abstractFetcher = - fetcherClass.getDeclaredConstructor(configClass).newInstance(configObject); - abstractFetcher.setName(name); + + Class configClass = + (Class) Class.forName(configClassName, true, fetcher.getClass().getClassLoader()); + FetcherConfig configObject = OBJECT_MAPPER.convertValue(paramsMap, configClass); if (Initializable.class.isAssignableFrom(fetcherClass)) { - Initializable initializable = (Initializable) abstractFetcher; + Initializable initializable = (Initializable) fetcher; initializable.initialize(tikaParamsMap); } - if (expiringFetcherStore.deleteFetcher(name)) { - LOG.info("Updating fetcher {}", name); + if (expiringFetcherStore.deleteFetcher(fetcherId)) { + LOG.info("Updating fetcher {}", fetcherId); } else { - LOG.info("Creating new fetcher {}", name); + LOG.info("Creating new fetcher {}", fetcherId); } - expiringFetcherStore.createFetcher(abstractFetcher, configObject); - } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | - InvocationTargetException | NoSuchMethodException | TikaConfigException e) { - throw new RuntimeException(e); + expiringFetcherStore.createFetcher(fetcherId, configObject); + } catch (ClassNotFoundException | TikaConfigException e) { + throw new TikaGrpcException("Could not create fetcher", e); } } @@ -340,16 +332,15 @@ static Status notFoundStatus(String fetcherId) { public void getFetcher(GetFetcherRequest request, StreamObserver responseObserver) { GetFetcherReply.Builder getFetcherReply = GetFetcherReply.newBuilder(); - AbstractConfig abstractConfig = + FetcherConfig fetcherConfig = expiringFetcherStore.getFetcherConfigs().get(request.getFetcherId()); - AbstractFetcher abstractFetcher = expiringFetcherStore.getFetchers().get(request.getFetcherId()); - if (abstractFetcher == null || abstractConfig == null) { + if (fetcherConfig == null) { responseObserver.onError(StatusProto.toStatusException(notFoundStatus(request.getFetcherId()))); return; } getFetcherReply.setFetcherId(request.getFetcherId()); - getFetcherReply.setFetcherClass(abstractFetcher.getClass().getName()); - Map paramMap = OBJECT_MAPPER.convertValue(abstractConfig, new TypeReference<>() {}); + getFetcherReply.setPluginId(fetcherConfig.getPluginId()); + Map paramMap = OBJECT_MAPPER.convertValue(fetcherConfig, new TypeReference<>() {}); paramMap.forEach( (k, v) -> getFetcherReply.putParams(Objects.toString(k), Objects.toString(v))); responseObserver.onNext(getFetcherReply.build()); @@ -360,8 +351,8 @@ public void getFetcher(GetFetcherRequest request, public void listFetchers(ListFetchersRequest request, StreamObserver responseObserver) { ListFetchersReply.Builder listFetchersReplyBuilder = ListFetchersReply.newBuilder(); - for (Map.Entry fetcherConfig : expiringFetcherStore.getFetcherConfigs() - .entrySet()) { + for (Map.Entry fetcherConfig : expiringFetcherStore.getFetcherConfigs() + .entrySet()) { GetFetcherReply.Builder replyBuilder = saveFetcherReply(fetcherConfig); listFetchersReplyBuilder.addGetFetcherReplies(replyBuilder.build()); } @@ -370,22 +361,19 @@ public void listFetchers(ListFetchersRequest request, } private GetFetcherReply.Builder saveFetcherReply( - Map.Entry fetcherConfig) { - AbstractFetcher abstractFetcher = - expiringFetcherStore.getFetchers().get(fetcherConfig.getKey()); - AbstractConfig abstractConfig = - expiringFetcherStore.getFetcherConfigs().get(fetcherConfig.getKey()); + Map.Entry fetcherConfigEntry) { + FetcherConfig fetcherConfig = fetcherConfigEntry.getValue(); GetFetcherReply.Builder replyBuilder = - GetFetcherReply.newBuilder().setFetcherClass(abstractFetcher.getClass().getName()) - .setFetcherId(abstractFetcher.getName()); - loadParamsIntoReply(abstractConfig, replyBuilder); + GetFetcherReply.newBuilder().setPluginId(fetcherConfig.getPluginId()) + .setFetcherId(fetcherConfig.getFetcherId()); + loadParamsIntoReply(fetcherConfig, replyBuilder); return replyBuilder; } - private static void loadParamsIntoReply(AbstractConfig abstractConfig, + private static void loadParamsIntoReply(FetcherConfig fetcherConfig, GetFetcherReply.Builder replyBuilder) { Map paramMap = - OBJECT_MAPPER.convertValue(abstractConfig, new TypeReference<>() { + OBJECT_MAPPER.convertValue(fetcherConfig, new TypeReference<>() { }); if (paramMap != null) { paramMap.forEach( @@ -412,19 +400,29 @@ public void deleteFetcher(DeleteFetcherRequest request, public void getFetcherConfigJsonSchema(GetFetcherConfigJsonSchemaRequest request, StreamObserver responseObserver) { GetFetcherConfigJsonSchemaReply.Builder builder = GetFetcherConfigJsonSchemaReply.newBuilder(); try { - JsonSchema jsonSchema = JSON_SCHEMA_GENERATOR.generateSchema(Class.forName(request.getFetcherClass())); + Fetcher fetcher = getFetcher(request.getPluginId()); + JsonSchema jsonSchema = JSON_SCHEMA_GENERATOR.generateSchema(fetcher.getClass()); builder.setFetcherConfigJsonSchema(OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(jsonSchema)); - } catch (ClassNotFoundException | JsonProcessingException e) { - throw new RuntimeException("Could not create json schema for " + request.getFetcherClass(), e); + } catch (JsonProcessingException e) { + throw new RuntimeException("Could not create json schema for fetcher with plugin ID " + request.getPluginId(), e); } responseObserver.onNext(builder.build()); responseObserver.onCompleted(); } + private Fetcher getFetcher(String pluginId) { + return pluginManager.getExtensions(Fetcher.class, pluginId) + .stream() + .findFirst() + .orElseThrow(); + } + @Override public void listFetcherPlugins(ListFetcherPluginsRequest request, StreamObserver responseObserver) { - for (Fetcher extension : pluginManager.getExtensions(Fetcher.class)) { - responseObserver.onNext(ListFetcherPluginsReply.newBuilder().setFetcherPluginId(extension.getName()).build()); + for (Fetcher fetcher : pluginManager.getExtensions(Fetcher.class)) { + responseObserver.onNext(ListFetcherPluginsReply.newBuilder() + .setFetcherPluginId(fetcher.getPluginId()) + .build()); } } diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/exception/TikaGrpcException.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/exception/TikaGrpcException.java new file mode 100644 index 0000000000..383eedb32e --- /dev/null +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/exception/TikaGrpcException.java @@ -0,0 +1,7 @@ +package org.apache.tika.pipes.grpc.exception; + +public class TikaGrpcException extends RuntimeException { + public TikaGrpcException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/tika-grpc/src/main/proto/tika.proto b/tika-grpc/src/main/proto/tika.proto index 8019ca9195..671bdd04c3 100644 --- a/tika-grpc/src/main/proto/tika.proto +++ b/tika-grpc/src/main/proto/tika.proto @@ -71,7 +71,7 @@ message SaveFetcherRequest { string fetcher_id = 1; // The full java class name of the fetcher class. List of // fetcher classes is found here: https://cwiki.apache.org/confluence/display/TIKA/tika-pipes - string fetcher_class = 2; + string plugin_id = 2; // JSON string of the fetcher config object. To see the json schema from which to build this json, // use the GetFetcherConfigJsonSchema rpc method. string fetcher_config_json = 3; @@ -122,7 +122,7 @@ message GetFetcherReply { // Echoes the ID of the fetcher being returned. string fetcher_id = 1; // The full Java class name of the Fetcher. - string fetcher_class = 2; + string plugin_id = 2; // The configuration parameters. map params = 3; } @@ -141,7 +141,7 @@ message ListFetchersReply { message GetFetcherConfigJsonSchemaRequest { // The full java class name of the fetcher config for which to fetch json schema. - string fetcher_class = 1; + string plugin_id = 1; } message GetFetcherConfigJsonSchemaReply { diff --git a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java index 264c366f38..09ce85f982 100644 --- a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java +++ b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java @@ -28,7 +28,7 @@ import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.pipes.fetcher.AbstractFetcher; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; class ExpiringFetcherStoreTest { @@ -42,7 +42,7 @@ public InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseC } }; fetcher.setName("nick"); - AbstractConfig config = new AbstractConfig() { + FetcherConfig config = new FetcherConfig() { }; expiringFetcherStore.createFetcher(fetcher, config); diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java index 2bfe61fa79..dbc03bb451 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java @@ -16,9 +16,17 @@ */ package org.apache.tika.pipes.fetcher.azblob.config; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; + +public class AZBlobFetcherConfig extends FetcherConfig { + + public static final String PLUGIN_ID = getPluginIdForFetcherConfig(AZBlobFetcherConfig.class); + + @Override + public String getPluginId() { + return PLUGIN_ID; + } -public class AZBlobFetcherConfig extends AbstractConfig { private boolean spoolToTemp; private String sasToken; private String endpoint; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java index b9f155fbd7..52ec0ecaab 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java @@ -16,9 +16,16 @@ */ package org.apache.tika.pipes.fetcher.fs.config; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; -public class FileSystemFetcherConfig extends AbstractConfig { +public class FileSystemFetcherConfig extends FetcherConfig { + + public static final String PLUGIN_ID = getPluginIdForFetcherConfig(FileSystemFetcherConfig.class); + + @Override + public String getPluginId() { + return PLUGIN_ID; + } private String basePath; private boolean extractFileSystemMetadata; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java index a8dad6417d..49b94572cb 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java @@ -16,9 +16,16 @@ */ package org.apache.tika.pipes.fetcher.gcs.config; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; -public class GCSFetcherConfig extends AbstractConfig { +public class GCSFetcherConfig extends FetcherConfig { + + public static final String PLUGIN_ID = getPluginIdForFetcherConfig(GCSFetcherConfig.class); + + @Override + public String getPluginId() { + return PLUGIN_ID; + } private boolean spoolToTemp; private String projectId; private String bucket; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java index 5274a65f9b..76fdc6ace5 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java @@ -19,9 +19,16 @@ import java.util.ArrayList; import java.util.List; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; -public class HttpFetcherConfig extends AbstractConfig { +public class HttpFetcherConfig extends FetcherConfig { + + public static final String PLUGIN_ID = getPluginIdForFetcherConfig(HttpFetcherConfig.class); + + @Override + public String getPluginId() { + return PLUGIN_ID; + } private String userName; private String password; private String ntDomain; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java index 495f83ba6b..478790c682 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java @@ -19,9 +19,15 @@ import java.util.ArrayList; import java.util.List; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; -public class MicrosoftGraphFetcherConfig extends AbstractConfig { +public class MicrosoftGraphFetcherConfig extends FetcherConfig { + + public static final String PLUGIN_ID = getPluginIdForFetcherConfig(MicrosoftGraphFetcherConfig.class); + @Override + public String getPluginId() { + return PLUGIN_ID; + } private long[] throttleSeconds; private boolean spoolToTemp; private ClientSecretCredentialsConfig clientSecretCredentialsConfig; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java index 84a335a2bd..24bd37893f 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java @@ -16,9 +16,16 @@ */ package org.apache.tika.pipes.fetcher.s3.config; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; -public class S3FetcherConfig extends AbstractConfig { +public class S3FetcherConfig extends FetcherConfig { + + public static final String PLUGIN_ID = getPluginIdForFetcherConfig(S3FetcherConfig.class); + + @Override + public String getPluginId() { + return PLUGIN_ID; + } private boolean spoolToTemp; private String region; private String profile; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java index 0750b780bb..c5d5531379 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java @@ -16,8 +16,14 @@ */ package org.apache.tika.pipes.fetcher.url.config; -import org.apache.tika.pipes.fetcher.config.AbstractConfig; +import org.apache.tika.pipes.fetcher.config.FetcherConfig; -public class UrlFetcherConfig extends AbstractConfig { - // no fetcher config needed at this time. +public class UrlFetcherConfig extends FetcherConfig { + + public static final String PLUGIN_ID = getPluginIdForFetcherConfig(UrlFetcherConfig.class); + + @Override + public String getPluginId() { + return PLUGIN_ID; + } } From 0814080c1db895ad2b90ea58ff37ee797d9ea657 Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Wed, 21 Aug 2024 09:27:38 -0500 Subject: [PATCH 05/11] TIKA-4272: more pf4j work --- .../apache/tika/pipes/async/MockFetcher.java | 5 +-- .../fetcher/fs/FileSystemFetcherTest.java | 1 - .../tika/pipes/grpc/TikaGrpcServer.java | 4 +- .../ClasspathPluginPropertiesFinder.java | 18 ++++++++ .../pipes/grpc/plugin/GrpcPluginManager.java | 31 ++++++++++++++ .../pipes/fetcher/azblob/AZBlobFetcher.java | 2 + .../pipes/fetcher/fs/FileSystemFetcher.java | 2 + .../tika/pipes/fetcher/gcs/GCSFetcher.java | 2 + .../tika/pipes/fetcher/http/HttpFetcher.java | 2 + .../tika-fetcher-microsoft-graph/pom.xml | 24 ++++++++++- .../tika/pipes/fetcher/s3/S3Fetcher.java | 2 + .../tika/pipes/fetcher/url/UrlFetcher.java | 3 ++ .../pipes/fetcher/url/UrlFetcherPlugin.java | 42 +++++++++++++++++++ 13 files changed, 131 insertions(+), 7 deletions(-) create mode 100644 tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java create mode 100644 tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcherPlugin.java diff --git a/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java b/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java index acb533ece4..ef825e81e0 100644 --- a/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java +++ b/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java @@ -27,14 +27,13 @@ import org.apache.tika.pipes.fetcher.Fetcher; public class MockFetcher implements Fetcher { - private static final byte[] BYTES = ("" + "" + "Nikolai Lobachevsky" + "main_content" + "").getBytes(StandardCharsets.UTF_8); @Override - public String getName() { - return "mock"; + public String getPluginId() { + return "mock-fetcher"; } @Override diff --git a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java index 7e29ac20ad..4d7e7068dd 100644 --- a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java +++ b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java @@ -50,7 +50,6 @@ public void testNullByte() throws Exception { FileSystemFetcher f = new FileSystemFetcher(); assertThrows(InvalidPathException.class, () -> { f.setBasePath("bad\u0000path"); - f.setName("fs"); f.checkInitialization(InitializableProblemHandler.IGNORE); }); } diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java index 2ef816d3a1..05a1efeae4 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java @@ -34,7 +34,6 @@ import io.grpc.TlsServerCredentials; import io.grpc.protobuf.services.HealthStatusManager; import io.grpc.protobuf.services.ProtoReflectionService; -import org.pf4j.DefaultPluginManager; import org.pf4j.PluginManager; import org.pf4j.PluginWrapper; import org.slf4j.Logger; @@ -43,6 +42,7 @@ import org.apache.tika.config.TikaConfig; import org.apache.tika.config.TikaConfigSerializer; import org.apache.tika.pipes.fetcher.Fetcher; +import org.apache.tika.pipes.grpc.plugin.GrpcPluginManager; /** * Server that manages startup/shutdown of the GRPC Tika server. @@ -106,7 +106,7 @@ public void start() throws Exception { TikaConfigSerializer.serialize(new TikaConfig(), TikaConfigSerializer.Mode.STATIC_FULL, fw, StandardCharsets.UTF_8); } } - pluginManager = pluginDirs == null ? new DefaultPluginManager() : new DefaultPluginManager(pluginDirs); + pluginManager = pluginDirs == null ? new GrpcPluginManager() : new GrpcPluginManager(pluginDirs); pluginManager.loadPlugins(); LOGGER.info("Loaded {} plugins", pluginManager.getPlugins().size()); pluginManager.startPlugins(); diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java new file mode 100644 index 0000000000..472c1c975c --- /dev/null +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java @@ -0,0 +1,18 @@ +package org.apache.tika.pipes.grpc.plugin; + +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.pf4j.PropertiesPluginDescriptorFinder; + +public class ClasspathPluginPropertiesFinder extends PropertiesPluginDescriptorFinder { + @Override + protected Path getPropertiesPath(Path pluginPath, String propertiesFileName) { + Path propertiesPath = super.getPropertiesPath(pluginPath, propertiesFileName); + if (!propertiesPath.toFile().exists()) { + // If in development mode, we can also pull the plugin.properties from $pluginDir/src/main/resources/plugin.properties + propertiesPath = Paths.get(propertiesPath.getParent().toAbsolutePath().toString(), "src", "main", "resources", "plugin.properties"); + } + return propertiesPath; + } +} diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java new file mode 100644 index 0000000000..21098dab5b --- /dev/null +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java @@ -0,0 +1,31 @@ +package org.apache.tika.pipes.grpc.plugin; + +import java.nio.file.Path; +import java.util.List; + +import org.pf4j.DefaultPluginManager; +import org.pf4j.PluginDescriptorFinder; +import org.pf4j.PluginLoader; + +public class GrpcPluginManager extends DefaultPluginManager { + public GrpcPluginManager() { + } + + public GrpcPluginManager(Path... pluginsRoots) { + super(pluginsRoots); + } + + public GrpcPluginManager(List pluginsRoots) { + super(pluginsRoots); + } + + @Override + protected PluginDescriptorFinder createPluginDescriptorFinder() { + return new ClasspathPluginPropertiesFinder(); + } + + @Override + protected PluginLoader createPluginLoader() { + return super.createPluginLoader(); + } +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java index 0dc05a2d59..13057f5795 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java @@ -29,6 +29,7 @@ import com.azure.storage.blob.BlobServiceClient; import com.azure.storage.blob.BlobServiceClientBuilder; import com.azure.storage.blob.models.BlobProperties; +import org.pf4j.Extension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,6 +57,7 @@ * 2) If you have different endpoints or sas tokens or containers across * your requests, your fetchKey will be the complete SAS url pointing to the blob. */ +@Extension public class AZBlobFetcher extends AbstractFetcher implements Initializable { public AZBlobFetcher() { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java index bc3c4cddd3..08ac378fee 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java @@ -27,6 +27,7 @@ import java.util.Date; import java.util.Map; +import org.pf4j.Extension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,6 +46,7 @@ import org.apache.tika.pipes.fetcher.AbstractFetcher; import org.apache.tika.pipes.fetcher.fs.config.FileSystemFetcherConfig; +@Extension public class FileSystemFetcher extends AbstractFetcher implements Initializable { public FileSystemFetcher() { } diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java index 75f89527e8..271eedb68d 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java @@ -27,6 +27,7 @@ import com.google.cloud.storage.BlobId; import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; +import org.pf4j.Extension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,6 +47,7 @@ /** * Fetches files from google cloud storage. Must set projectId and bucket via the config. */ +@Extension public class GCSFetcher extends AbstractFetcher implements Initializable { public GCSFetcher() { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java index 893e6c77b7..f2ef38a430 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java @@ -58,6 +58,7 @@ import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.impl.conn.ConnectionShutdownException; import org.apache.http.util.EntityUtils; +import org.pf4j.Extension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -88,6 +89,7 @@ /** * Based on Apache httpclient */ +@Extension public class HttpFetcher extends AbstractFetcher implements Initializable, RangeFetcher { public HttpFetcher() { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml index a7ef0b44b9..6960c987e2 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml @@ -41,6 +41,7 @@ 9.40 1.16.2 2.0.10 + 2.42.0 @@ -102,6 +103,11 @@ kotlin-stdlib ${kotlin-stdlib.version} + + com.google.cloud + google-cloud-storage + ${google-cloud-storage.version} + org.junit.jupiter junit-jupiter-engine @@ -185,7 +191,23 @@ - + + org.apache.maven.plugins + maven-dependency-plugin + 3.6.1 + + + copy-dependencies + package + + copy-dependencies + + + ${project.build.directory}/lib + + + + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java index ab4a139a0f..fd9030026e 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java @@ -41,6 +41,7 @@ import com.amazonaws.services.s3.model.AmazonS3Exception; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.S3Object; +import org.pf4j.Extension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,6 +67,7 @@ * The bucket must be specified via the tika-config or before * initialization, and the fetch key is "path/to/my_file.pdf". */ +@Extension public class S3Fetcher extends AbstractFetcher implements Initializable, RangeFetcher { public S3Fetcher() { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java index 7692516cd0..9421d702b9 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java @@ -21,6 +21,8 @@ import java.net.URL; import java.util.Locale; +import org.pf4j.Extension; + import org.apache.tika.exception.TikaException; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; @@ -33,6 +35,7 @@ * Please use the FileSystemFetcher for that. If you need more advanced control (passwords, * timeouts, proxies, etc), please use the tika-fetcher-http module. */ +@Extension public class UrlFetcher extends AbstractFetcher { @Override diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcherPlugin.java b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcherPlugin.java new file mode 100644 index 0000000000..ab73c043bc --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcherPlugin.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.url; + +import org.pf4j.Plugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class UrlFetcherPlugin extends Plugin { + private static final Logger LOG = LoggerFactory.getLogger(UrlFetcherPlugin.class); + @Override + public void start() { + LOG.info("Starting"); + super.start(); + } + + @Override + public void stop() { + LOG.info("Stopping"); + super.stop(); + } + + @Override + public void delete() { + LOG.info("Deleting"); + super.delete(); + } +} From 613e3f5e415db7932b28cd6b2b19ebe09c41516b Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Sat, 24 Aug 2024 10:45:17 -0500 Subject: [PATCH 06/11] TIKA-4272: switch to maven dependency plugin and assembly plugin to replace shaded jar --- tika-pipes/tika-fetchers/pom.xml | 3 +- .../tika-fetcher-az-blob/pom.xml | 138 +++++++---------- .../src/assembly/assembly.xml | 30 ++++ .../tika-fetchers/tika-fetcher-fs/pom.xml | 131 +++++++--------- .../tika-fetcher-fs/src/assembly/assembly.xml | 30 ++++ .../tika-fetchers/tika-fetcher-gcs/pom.xml | 138 +++++++---------- .../src/assembly/assembly.xml | 30 ++++ .../tika-fetchers/tika-fetcher-http/pom.xml | 64 +++----- .../src/assembly/assembly.xml | 30 ++++ .../tika-fetcher-microsoft-graph/pom.xml | 103 ++++-------- .../tika-fetchers/tika-fetcher-s3/pom.xml | 146 ++++++++---------- .../tika-fetcher-s3/src/assembly/assembly.xml | 30 ++++ .../tika-fetchers/tika-fetcher-url/pom.xml | 128 +++++++-------- .../src/assembly/assembly.xml | 30 ++++ 14 files changed, 511 insertions(+), 520 deletions(-) create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/assembly/assembly.xml create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-fs/src/assembly/assembly.xml create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-gcs/src/assembly/assembly.xml create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-http/src/assembly/assembly.xml create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-s3/src/assembly/assembly.xml create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-url/src/assembly/assembly.xml diff --git a/tika-pipes/tika-fetchers/pom.xml b/tika-pipes/tika-fetchers/pom.xml index fa86d9db5f..7c1c78a0d0 100644 --- a/tika-pipes/tika-fetchers/pom.xml +++ b/tika-pipes/tika-fetchers/pom.xml @@ -17,7 +17,8 @@ specific language governing permissions and limitations under the License. --> - + org.apache.tika tika-pipes diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml index 903007cf2e..d262f7a118 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml @@ -17,90 +17,66 @@ specific language governing permissions and limitations under the License. --> - - - tika-fetchers - org.apache.tika - 3.0.0-SNAPSHOT - - 4.0.0 + + + tika-fetchers + org.apache.tika + 3.0.0-SNAPSHOT + + 4.0.0 - tika-fetcher-az-blob - Apache Tika Azure Blob fetcher + tika-fetcher-az-blob + Apache Tika Azure Blob fetcher - - - com.azure - azure-storage-blob - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.apache.tika.pipes.fetcher.azblob - - - - - - - test-jar - - - - - - maven-shade-plugin - ${maven.shade.version} - - - package - - shade - - - - false - - - - - *:* - - META-INF/* - LICENSE.txt - NOTICE.txt - - - - - - META-INF/LICENSE - target/classes/META-INF/LICENSE - - - META-INF/NOTICE - target/classes/META-INF/NOTICE - - - META-INF/DEPENDENCIES - target/classes/META-INF/DEPENDENCIES - - - - - - - - - + + + com.azure + azure-storage-blob + + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.6.1 + + + copy-dependencies + package + + copy-dependencies + + + ${project.build.directory}/lib + compile + tika-core + + + + + + maven-assembly-plugin + + + ${project.basedir}/src/assembly/assembly.xml + + false + + + + make-assembly + package + + single + + + + + + 3.0.0-BETA2-rc1 diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/assembly/assembly.xml new file mode 100644 index 0000000000..d614dfc367 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/assembly/assembly.xml @@ -0,0 +1,30 @@ + + dependencies-zip + + zip + + false + + + ${project.build.directory}/lib + /lib + + + ${project.build.directory} + /lib + + ${project.artifactId}-${project.version}.jar + + + + ${project.basedir}/src/main/resources + / + + plugin.properties + + + + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml index e4bb74b24a..2ac5bed9bf 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml @@ -17,86 +17,63 @@ specific language governing permissions and limitations under the License. --> - - - tika-fetchers - org.apache.tika - 3.0.0-SNAPSHOT - - 4.0.0 + + + tika-fetchers + org.apache.tika + 3.0.0-SNAPSHOT + + 4.0.0 - tika-fetcher-fs - Apache Tika FS Fetcher - Apache Tika Pipes Fetcher for Local File System + tika-fetcher-fs + Apache Tika FS Fetcher + Apache Tika Pipes Fetcher for Local File System - - + + - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.apache.tika.pipes.fetcher.s3 - - - - - - - test-jar - - - - - - maven-shade-plugin - ${maven.shade.version} - - - package - - shade - - - - false - - - - - *:* - - META-INF/* - LICENSE.txt - NOTICE.txt - - - - - - META-INF/LICENSE - target/classes/META-INF/LICENSE - - - META-INF/NOTICE - target/classes/META-INF/NOTICE - - - META-INF/DEPENDENCIES - target/classes/META-INF/DEPENDENCIES - - - - - - - - - + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.6.1 + + + copy-dependencies + package + + copy-dependencies + + + ${project.build.directory}/lib + compile + tika-core + + + + + + maven-assembly-plugin + + + ${project.basedir}/src/assembly/assembly.xml + + false + + + + make-assembly + package + + single + + + + + + 3.0.0-BETA2-rc1 diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/assembly/assembly.xml new file mode 100644 index 0000000000..d614dfc367 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/assembly/assembly.xml @@ -0,0 +1,30 @@ + + dependencies-zip + + zip + + false + + + ${project.build.directory}/lib + /lib + + + ${project.build.directory} + /lib + + ${project.artifactId}-${project.version}.jar + + + + ${project.basedir}/src/main/resources + / + + plugin.properties + + + + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml index f975d145e8..467f51d572 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml @@ -17,90 +17,66 @@ specific language governing permissions and limitations under the License. --> - - - tika-fetchers - org.apache.tika - 3.0.0-SNAPSHOT - - 4.0.0 + + + tika-fetchers + org.apache.tika + 3.0.0-SNAPSHOT + + 4.0.0 - tika-fetcher-gcs - Apache Tika Google Cloud Storage fetcher + tika-fetcher-gcs + Apache Tika Google Cloud Storage fetcher - - - com.google.cloud - google-cloud-storage - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.apache.tika.pipes.fetcher.gcs - - - - - - - test-jar - - - - - - maven-shade-plugin - ${maven.shade.version} - - - package - - shade - - - - false - - - - - *:* - - META-INF/* - LICENSE.txt - NOTICE.txt - - - - - - META-INF/LICENSE - target/classes/META-INF/LICENSE - - - META-INF/NOTICE - target/classes/META-INF/NOTICE - - - META-INF/DEPENDENCIES - target/classes/META-INF/DEPENDENCIES - - - - - - - - - + + + com.google.cloud + google-cloud-storage + + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.6.1 + + + copy-dependencies + package + + copy-dependencies + + + ${project.build.directory}/lib + compile + tika-core + + + + + + maven-assembly-plugin + + + ${project.basedir}/src/assembly/assembly.xml + + false + + + + make-assembly + package + + single + + + + + + 3.0.0-BETA2-rc1 diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/assembly/assembly.xml new file mode 100644 index 0000000000..d614dfc367 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/assembly/assembly.xml @@ -0,0 +1,30 @@ + + dependencies-zip + + zip + + false + + + ${project.build.directory}/lib + /lib + + + ${project.build.directory} + /lib + + ${project.artifactId}-${project.version}.jar + + + + ${project.basedir}/src/main/resources + / + + plugin.properties + + + + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml index 4d0761dd50..59e6ff5c69 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml @@ -17,7 +17,8 @@ specific language governing permissions and limitations under the License. --> - + tika-fetchers org.apache.tika @@ -56,61 +57,38 @@ org.apache.maven.plugins - maven-jar-plugin - - - - org.apache.tika.pipes.fetcher.http - - - + maven-dependency-plugin + 3.6.1 + copy-dependencies + package - test-jar + copy-dependencies + + ${project.build.directory}/lib + compile + tika-core + - maven-shade-plugin - ${maven.shade.version} + maven-assembly-plugin + + + ${project.basedir}/src/assembly/assembly.xml + + false + + make-assembly package - shade + single - - - false - - - - - *:* - - META-INF/* - LICENSE.txt - NOTICE.txt - - - - - - META-INF/LICENSE - target/classes/META-INF/LICENSE - - - META-INF/NOTICE - target/classes/META-INF/NOTICE - - - META-INF/DEPENDENCIES - target/classes/META-INF/DEPENDENCIES - - - diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-http/src/assembly/assembly.xml new file mode 100644 index 0000000000..d614dfc367 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/assembly/assembly.xml @@ -0,0 +1,30 @@ + + dependencies-zip + + zip + + false + + + ${project.build.directory}/lib + /lib + + + ${project.build.directory} + /lib + + ${project.artifactId}-${project.version}.jar + + + + ${project.basedir}/src/main/resources + / + + plugin.properties + + + + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml index 6960c987e2..135e0fc008 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml @@ -35,7 +35,7 @@ UTF-8 1.13.2 6.13.0 - 1.1.1 + 1.2.0 3.3.1 5.12.0 9.40 @@ -64,11 +64,6 @@ - - ${project.groupId} - tika-core - ${project.version} - com.microsoft.graph microsoft-graph @@ -107,16 +102,16 @@ com.google.cloud google-cloud-storage ${google-cloud-storage.version} - - - org.junit.jupiter - junit-jupiter-engine - test - - - org.mockito - mockito-core - test + + + io.opentelemetry + opentelemetry-api + + + io.opentelemetry + opentelemetry-context + + org.mockito @@ -129,82 +124,38 @@ org.apache.maven.plugins - maven-jar-plugin - - - - org.apache.tika.pipes.fetcher.s3 - - - - - - - test-jar - - - - - - maven-shade-plugin - ${maven.shade.version} + maven-dependency-plugin + 3.6.1 + copy-dependencies package - shade + copy-dependencies - - false - - - - - *:* - - META-INF/* - LICENSE.txt - NOTICE.txt - - - - - - META-INF/LICENSE - target/classes/META-INF/LICENSE - - - META-INF/NOTICE - target/classes/META-INF/NOTICE - - - META-INF/DEPENDENCIES - target/classes/META-INF/DEPENDENCIES - - - META-INF/extensions.idx - target/classes/META-INF/extensions.idx - - + ${project.build.directory}/lib + compile + tika-core - org.apache.maven.plugins - maven-dependency-plugin - 3.6.1 + maven-assembly-plugin + + + ${project.basedir}/src/assembly/assembly.xml + + false + - copy-dependencies + make-assembly package - copy-dependencies + single - - ${project.build.directory}/lib - diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml index 2189451e19..e24aba28b5 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml @@ -17,94 +17,70 @@ specific language governing permissions and limitations under the License. --> - - - tika-fetchers - org.apache.tika - 3.0.0-SNAPSHOT - - 4.0.0 + + + tika-fetchers + org.apache.tika + 3.0.0-SNAPSHOT + + 4.0.0 - tika-fetcher-s3 - Apache Tika S3 fetcher + tika-fetcher-s3 + Apache Tika S3 fetcher - - - com.amazonaws - aws-java-sdk-s3 - - - commons-io - commons-io - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.apache.tika.pipes.fetcher.s3 - - - - - - - test-jar - - - - - - maven-shade-plugin - ${maven.shade.version} - - - package - - shade - - - - false - - - - - *:* - - META-INF/* - LICENSE.txt - NOTICE.txt - - - - - - META-INF/LICENSE - target/classes/META-INF/LICENSE - - - META-INF/NOTICE - target/classes/META-INF/NOTICE - - - META-INF/DEPENDENCIES - target/classes/META-INF/DEPENDENCIES - - - - - - - - - + + + com.amazonaws + aws-java-sdk-s3 + + + commons-io + commons-io + + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.6.1 + + + copy-dependencies + package + + copy-dependencies + + + ${project.build.directory}/lib + compile + tika-core + + + + + + maven-assembly-plugin + + + ${project.basedir}/src/assembly/assembly.xml + + false + + + + make-assembly + package + + single + + + + + + 3.0.0-BETA2-rc1 diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/assembly/assembly.xml new file mode 100644 index 0000000000..d614dfc367 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/assembly/assembly.xml @@ -0,0 +1,30 @@ + + dependencies-zip + + zip + + false + + + ${project.build.directory}/lib + /lib + + + ${project.build.directory} + /lib + + ${project.artifactId}-${project.version}.jar + + + + ${project.basedir}/src/main/resources + / + + plugin.properties + + + + diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml index 860f815fc9..05d1290078 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml @@ -17,84 +17,60 @@ specific language governing permissions and limitations under the License. --> - - - tika-fetchers - org.apache.tika - 3.0.0-SNAPSHOT - - 4.0.0 + + + tika-fetchers + org.apache.tika + 3.0.0-SNAPSHOT + + 4.0.0 - tika-fetcher-url - Apache Tika URL Fetcher - Apache Tika Pipes Fetcher for HTTP URLs - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.apache.tika.pipes.fetcher.s3 - - - - - - - test-jar - - - - - - maven-shade-plugin - ${maven.shade.version} - - - package - - shade - - - - false - - - - - *:* - - META-INF/* - LICENSE.txt - NOTICE.txt - - - - - - META-INF/LICENSE - target/classes/META-INF/LICENSE - - - META-INF/NOTICE - target/classes/META-INF/NOTICE - - - META-INF/DEPENDENCIES - target/classes/META-INF/DEPENDENCIES - - - - - - - - - + tika-fetcher-url + Apache Tika URL Fetcher + Apache Tika Pipes Fetcher for HTTP URLs + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.6.1 + + + copy-dependencies + package + + copy-dependencies + + + ${project.build.directory}/lib + compile + tika-core + + + + + + maven-assembly-plugin + + + ${project.basedir}/src/assembly/assembly.xml + + false + + + + make-assembly + package + + single + + + + + + 3.0.0-BETA2-rc1 diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-url/src/assembly/assembly.xml new file mode 100644 index 0000000000..d614dfc367 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/src/assembly/assembly.xml @@ -0,0 +1,30 @@ + + dependencies-zip + + zip + + false + + + ${project.build.directory}/lib + /lib + + + ${project.build.directory} + /lib + + ${project.artifactId}-${project.version}.jar + + + + ${project.basedir}/src/main/resources + / + + plugin.properties + + + + From a41712aec80bbb8015552e45fc8fe550df6dc56b Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Sat, 24 Aug 2024 10:46:35 -0500 Subject: [PATCH 07/11] TIKA-4272: add a couple missing files from previous commit --- .../fetcher/url/config/UrlFetcherConfig.java | 28 +++++++++++++++++ .../src/assembly/assembly.xml | 30 +++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java create mode 100644 tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/assembly/assembly.xml diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java new file mode 100644 index 0000000000..9317c20fd0 --- /dev/null +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.url.config; + +import org.apache.tika.pipes.fetcher.config.FetcherConfig; + +public class UrlFetcherConfig extends FetcherConfig { + public static final String PLUGIN_ID = "url-fetcher"; + + @Override + public String getPluginId() { + return PLUGIN_ID; + } +} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/assembly/assembly.xml new file mode 100644 index 0000000000..d614dfc367 --- /dev/null +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/assembly/assembly.xml @@ -0,0 +1,30 @@ + + dependencies-zip + + zip + + false + + + ${project.build.directory}/lib + /lib + + + ${project.build.directory} + /lib + + ${project.artifactId}-${project.version}.jar + + + + ${project.basedir}/src/main/resources + / + + plugin.properties + + + + From b7613c83ff98e4a2237067a745717a93a207f6ad Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Sat, 24 Aug 2024 11:56:22 -0500 Subject: [PATCH 08/11] TIKA-4272: get more of the pf4j changes and removal of shade plugin changes in --- tika-core/pom.xml | 9 +- .../pipes/fetcher/config/FetcherConfig.java | 13 -- .../pipes/fetcher/fs/FileSystemFetcher.java | 182 ------------------ .../tika/pipes/fetcher/url/UrlFetcher.java | 53 ----- .../fetcher/url/config/UrlFetcherConfig.java | 28 --- .../tika/config/TikaPipesConfigTest.java | 14 -- .../fetcher/fs/FileSystemFetcherTest.java | 56 ------ tika-grpc/pom.xml | 5 + .../tika/pipes/grpc/TikaGrpcServer.java | 12 -- .../tika/pipes/grpc/TikaGrpcServerImpl.java | 12 +- .../grpc/exception/TikaGrpcException.java | 24 +++ .../ClasspathPluginPropertiesFinder.java | 16 ++ .../pipes/grpc/plugin/GrpcPluginManager.java | 47 +++++ .../pipes/grpc/ExpiringFetcherStoreTest.java | 41 ++-- ...BiDirectionalStreamingIntegrationTest.java | 4 +- .../tika/pipes/grpc/TikaGrpcServerTest.java | 34 +++- tika-pipes/tika-fetchers/pom.xml | 2 +- .../tika-fetcher-az-blob/pom.xml | 2 +- .../src/{ => main}/assembly/assembly.xml | 0 .../azblob/config/AZBlobFetcherConfig.java | 2 +- .../pom.xml | 4 +- .../src/main}/assembly/assembly.xml | 0 .../pipes/fetcher/fs/FileSystemFetcher.java | 0 .../fetcher/fs/FileSystemFetcherPlugin.java | 0 .../fs/config/FileSystemFetcherConfig.java | 2 +- .../src/main/resources/plugin.properties | 2 +- .../fs/config/FileSystemFetcherConfig.java | 49 ----- .../tika-fetchers/tika-fetcher-gcs/pom.xml | 2 +- .../src/{ => main}/assembly/assembly.xml | 0 .../fetcher/gcs/config/GCSFetcherConfig.java | 2 +- .../tika-fetchers/tika-fetcher-http/pom.xml | 2 +- .../src/{ => main}/assembly/assembly.xml | 0 .../http/config/HttpFetcherConfig.java | 2 +- .../tika-fetcher-microsoft-graph/pom.xml | 2 +- .../src/{ => main}/assembly/assembly.xml | 0 .../config/MicrosoftGraphFetcherConfig.java | 2 +- .../tika-fetchers/tika-fetcher-s3/pom.xml | 2 +- .../src/{ => main}/assembly/assembly.xml | 0 .../fetcher/s3/config/S3FetcherConfig.java | 2 +- .../tika-fetchers/tika-fetcher-url/pom.xml | 2 +- .../src/{ => main}/assembly/assembly.xml | 0 .../fetcher/url/config/UrlFetcherConfig.java | 2 +- .../server/core/FetcherStreamFactory.java | 2 +- 43 files changed, 176 insertions(+), 459 deletions(-) delete mode 100644 tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java delete mode 100644 tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java delete mode 100644 tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java delete mode 100644 tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java rename tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/{ => main}/assembly/assembly.xml (100%) rename tika-pipes/tika-fetchers/{tika-fetcher-fs => tika-fetcher-file-system}/pom.xml (95%) rename tika-pipes/tika-fetchers/{tika-fetcher-fs/src => tika-fetcher-file-system/src/main}/assembly/assembly.xml (100%) rename tika-pipes/tika-fetchers/{tika-fetcher-fs => tika-fetcher-file-system}/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java (100%) rename tika-pipes/tika-fetchers/{tika-fetcher-fs => tika-fetcher-file-system}/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherPlugin.java (100%) rename {tika-core => tika-pipes/tika-fetchers/tika-fetcher-file-system}/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java (94%) rename tika-pipes/tika-fetchers/{tika-fetcher-fs => tika-fetcher-file-system}/src/main/resources/plugin.properties (97%) delete mode 100644 tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java rename tika-pipes/tika-fetchers/tika-fetcher-gcs/src/{ => main}/assembly/assembly.xml (100%) rename tika-pipes/tika-fetchers/tika-fetcher-http/src/{ => main}/assembly/assembly.xml (100%) rename tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/{ => main}/assembly/assembly.xml (100%) rename tika-pipes/tika-fetchers/tika-fetcher-s3/src/{ => main}/assembly/assembly.xml (100%) rename tika-pipes/tika-fetchers/tika-fetcher-url/src/{ => main}/assembly/assembly.xml (100%) diff --git a/tika-core/pom.xml b/tika-core/pom.xml index 172552a174..f5a9483c80 100644 --- a/tika-core/pom.xml +++ b/tika-core/pom.xml @@ -19,7 +19,8 @@ under the License. --> - + 4.0.0 @@ -261,9 +262,9 @@ -Xmx256m 240000 max - true - - + true + + org.apache.maven.plugins maven-project-info-reports-plugin diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java index 241211f36e..e9df451891 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java +++ b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java @@ -16,9 +16,6 @@ */ package org.apache.tika.pipes.fetcher.config; -import java.io.IOException; -import java.util.Properties; - public abstract class FetcherConfig { private String fetcherId; @@ -32,14 +29,4 @@ public FetcherConfig setFetcherId(String fetcherId) { this.fetcherId = fetcherId; return this; } - - public static String getPluginIdForFetcherConfig(Class clazz) { - Properties properties = new Properties(); - try { - properties.load(clazz.getResourceAsStream("/plugin.properties")); - return properties.getProperty("plugin.id"); - } catch (IOException e) { - throw new IllegalStateException("Cannot find plugin.properties for plugin", e); - } - } } diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java deleted file mode 100644 index bc3c4cddd3..0000000000 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.fetcher.fs; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.attribute.BasicFileAttributes; -import java.nio.file.attribute.FileTime; -import java.util.Date; -import java.util.Map; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.tika.config.Field; -import org.apache.tika.config.Initializable; -import org.apache.tika.config.InitializableProblemHandler; -import org.apache.tika.config.Param; -import org.apache.tika.exception.TikaConfigException; -import org.apache.tika.exception.TikaException; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.FileSystem; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.Property; -import org.apache.tika.metadata.TikaCoreProperties; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.pipes.fetcher.AbstractFetcher; -import org.apache.tika.pipes.fetcher.fs.config.FileSystemFetcherConfig; - -public class FileSystemFetcher extends AbstractFetcher implements Initializable { - public FileSystemFetcher() { - } - - public FileSystemFetcher(FileSystemFetcherConfig fileSystemFetcherConfig) { - setBasePath(fileSystemFetcherConfig.getBasePath()); - setExtractFileSystemMetadata(fileSystemFetcherConfig.isExtractFileSystemMetadata()); - } - - private static final Logger LOG = LoggerFactory.getLogger(FileSystemFetcher.class); - - //Warning! basePath can be null! - private Path basePath = null; - - private boolean extractFileSystemMetadata = false; - - static boolean isDescendant(Path root, Path descendant) { - return descendant.toAbsolutePath().normalize() - .startsWith(root.toAbsolutePath().normalize()); - } - - @Override - public InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) throws IOException, TikaException { - if (fetchKey.contains("\u0000")) { - throw new IllegalArgumentException("Path must not contain 'u0000'. " + - "Please review the life decisions that led you to requesting " + - "a file name with this character in it."); - } - Path p = null; - if (basePath != null) { - p = basePath.resolve(fetchKey); - if (!p.toRealPath().startsWith(basePath.toRealPath())) { - throw new IllegalArgumentException( - "fetchKey must resolve to be a descendant of the 'basePath'"); - } - } else { - p = Paths.get(fetchKey); - } - - metadata.set(TikaCoreProperties.SOURCE_PATH, fetchKey); - updateFileSystemMetadata(p, metadata); - - if (!Files.isRegularFile(p)) { - if (basePath != null && !Files.isDirectory(basePath)) { - throw new IOException("BasePath is not a directory: " + basePath); - } else { - throw new FileNotFoundException(p.toAbsolutePath().toString()); - } - } - - return TikaInputStream.get(p, metadata); - } - - private void updateFileSystemMetadata(Path p, Metadata metadata) throws IOException { - if (! extractFileSystemMetadata) { - return; - } - BasicFileAttributes attrs = Files.readAttributes(p, BasicFileAttributes.class); - updateFileTime(FileSystem.CREATED, attrs.creationTime(), metadata); - updateFileTime(FileSystem.MODIFIED, attrs.lastModifiedTime(), metadata); - updateFileTime(FileSystem.ACCESSED, attrs.lastAccessTime(), metadata); - //TODO extract owner or group? - } - - private void updateFileTime(Property property, FileTime fileTime, Metadata metadata) { - if (fileTime == null) { - return; - } - metadata.set(property, new Date(fileTime.toMillis())); - } - - /** - * - * @return the basePath or null if no base path was set - */ - public Path getBasePath() { - return basePath; - } - - /** - * Default behavior si that clients will send in relative paths, this - * must be set to allow this fetcher to fetch the - * full path. - * - * @param basePath - */ - @Field - public void setBasePath(String basePath) { - this.basePath = Paths.get(basePath); - } - - /** - * Extract file system metadata (created, modified, accessed) when fetching file. - * The default is false. - * - * @param extractFileSystemMetadata - */ - @Field - public void setExtractFileSystemMetadata(boolean extractFileSystemMetadata) { - this.extractFileSystemMetadata = extractFileSystemMetadata; - } - - @Override - public void initialize(Map params) throws TikaConfigException { - //no-op - } - - @Override - public void checkInitialization(InitializableProblemHandler problemHandler) - throws TikaConfigException { - if (basePath == null || basePath.toString().trim().length() == 0) { - LOG.warn("'basePath' has not been set. " + - "This means that client code or clients can read from any file that this " + - "process has permissions to read. If you are running tika-server, make " + - "absolutely certain that you've locked down " + - "access to tika-server and file-permissions for the tika-server process."); - return; - } - if (basePath.toString().startsWith("http://")) { - throw new TikaConfigException("FileSystemFetcher only works with local file systems. " + - " Please use the tika-fetcher-http module for http calls"); - } else if (basePath.toString().startsWith("ftp://")) { - throw new TikaConfigException("FileSystemFetcher only works with local file systems. " + - " Please consider contributing an ftp fetcher module"); - } else if (basePath.toString().startsWith("s3://")) { - throw new TikaConfigException("FileSystemFetcher only works with local file systems. " + - " Please use the tika-fetcher-s3 module"); - } - - if (basePath.toAbsolutePath().toString().contains("\u0000")) { - throw new TikaConfigException( - "base path must not contain \u0000. " + "Seriously, what were you thinking?"); - } - } -} diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java deleted file mode 100644 index 7692516cd0..0000000000 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/UrlFetcher.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.fetcher.url; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.util.Locale; - -import org.apache.tika.exception.TikaException; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.pipes.fetcher.AbstractFetcher; - -/** - * Simple fetcher for URLs. This simply calls {@link TikaInputStream#get(URL)}. - * This intentionally does not support fetching for files. - * Please use the FileSystemFetcher for that. If you need more advanced control (passwords, - * timeouts, proxies, etc), please use the tika-fetcher-http module. - */ -public class UrlFetcher extends AbstractFetcher { - - @Override - public InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) throws IOException, TikaException { - if (fetchKey.contains("\u0000")) { - throw new IllegalArgumentException("URL must not contain \u0000. " + - "Please review the life decisions that led you to requesting " + - "a URL with this character in it."); - } - if (fetchKey.toLowerCase(Locale.US).trim().startsWith("file:")) { - throw new IllegalArgumentException( - "The UrlFetcher does not fetch from file shares; " + - "please use the FileSystemFetcher"); - } - return TikaInputStream.get(new URL(fetchKey), metadata); - } - -} diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java b/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java deleted file mode 100644 index 9317c20fd0..0000000000 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.fetcher.url.config; - -import org.apache.tika.pipes.fetcher.config.FetcherConfig; - -public class UrlFetcherConfig extends FetcherConfig { - public static final String PLUGIN_ID = "url-fetcher"; - - @Override - public String getPluginId() { - return PLUGIN_ID; - } -} diff --git a/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java b/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java index 3ea1e538ce..0f1ab2c906 100644 --- a/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java +++ b/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java @@ -22,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.List; import org.junit.jupiter.api.Test; @@ -34,24 +33,11 @@ import org.apache.tika.pipes.async.MockReporter; import org.apache.tika.pipes.emitter.Emitter; import org.apache.tika.pipes.emitter.EmitterManager; -import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.fetcher.FetcherManager; -import org.apache.tika.pipes.fetcher.fs.FileSystemFetcher; import org.apache.tika.pipes.pipesiterator.PipesIterator; public class TikaPipesConfigTest extends AbstractTikaConfigTest { //this handles tests for the newer pipes type configs. - - @Test - public void testFetchers() throws Exception { - FetcherManager m = FetcherManager.load(getConfigFilePath("fetchers-config.xml")); - Fetcher f1 = m.getFetcher("fs1"); - assertEquals(Paths.get("/my/base/path1"), ((FileSystemFetcher) f1).getBasePath()); - - Fetcher f2 = m.getFetcher("fs2"); - assertEquals(Paths.get("/my/base/path2"), ((FileSystemFetcher) f2).getBasePath()); - } - @Test public void testDuplicateFetchers() throws Exception { //can't have two fetchers with the same name diff --git a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java b/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java deleted file mode 100644 index 4d7e7068dd..0000000000 --- a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherTest.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.fetcher.fs; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.file.InvalidPathException; -import java.nio.file.Path; -import java.nio.file.Paths; - -import org.junit.jupiter.api.Test; - -import org.apache.tika.config.InitializableProblemHandler; - - -public class FileSystemFetcherTest { - - @Test - public void testDescendant() throws Exception { - - Path root = Paths.get("/ab/cd/"); - Path descendant = root.resolve("ef/gh/ij.pdf"); - assertTrue(FileSystemFetcher.isDescendant(root, descendant)); - - descendant = Paths.get("/cd/ef.pdf"); - assertFalse(FileSystemFetcher.isDescendant(root, descendant)); - - descendant = root.resolve("../../ij.pdf"); - assertFalse(FileSystemFetcher.isDescendant(root, descendant)); - } - - @Test - public void testNullByte() throws Exception { - FileSystemFetcher f = new FileSystemFetcher(); - assertThrows(InvalidPathException.class, () -> { - f.setBasePath("bad\u0000path"); - f.checkInitialization(InitializableProblemHandler.IGNORE); - }); - } -} diff --git a/tika-grpc/pom.xml b/tika-grpc/pom.xml index e8bdfeb66b..99a8517095 100644 --- a/tika-grpc/pom.xml +++ b/tika-grpc/pom.xml @@ -223,6 +223,11 @@ tika-fetcher-http ${project.version} + + org.apache.tika + tika-fetcher-file-system + ${project.version} + com.fasterxml.jackson.module jackson-module-jsonSchema diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java index 05a1efeae4..d08754eedc 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java @@ -35,13 +35,11 @@ import io.grpc.protobuf.services.HealthStatusManager; import io.grpc.protobuf.services.ProtoReflectionService; import org.pf4j.PluginManager; -import org.pf4j.PluginWrapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.tika.config.TikaConfig; import org.apache.tika.config.TikaConfigSerializer; -import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.grpc.plugin.GrpcPluginManager; /** @@ -110,16 +108,6 @@ public void start() throws Exception { pluginManager.loadPlugins(); LOGGER.info("Loaded {} plugins", pluginManager.getPlugins().size()); pluginManager.startPlugins(); - for (PluginWrapper plugin : pluginManager.getStartedPlugins()) { - LOGGER.info("Add-in " + plugin.getPluginId() + " : " + plugin.getDescriptor() + " has started."); - for (Class extension : pluginManager.getExtensionClasses(plugin.getPluginId())) { - LOGGER.info(" Extension " + extension + " has been registered -- {}", extension.isAssignableFrom(Fetcher.class)); - LOGGER.info(" or -- {}", Fetcher.class.isAssignableFrom(extension)); - } - } - for (PluginWrapper plugin : pluginManager.getUnresolvedPlugins()) { - LOGGER.warn("Add-in " + plugin.getPluginId() + " : " + plugin.getDescriptor() + " is unresolved."); - } File tikaConfigFile = new File(tikaConfigXml.getAbsolutePath()); healthStatusManager.setStatus(TikaGrpcServer.class.getSimpleName(), ServingStatus.SERVING); server = Grpc diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java index 1f4a81fa8d..46e74d71aa 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java @@ -220,7 +220,7 @@ private void fetchAndParseImpl(FetchAndParseRequest request, FetcherConfig fetcherConfig = expiringFetcherStore.getFetcherAndLogAccess(request.getFetcherId()); if (fetcherConfig == null) { - throw new RuntimeException( + throw new TikaGrpcException( "Could not find fetcher with name " + request.getFetcherId()); } Metadata tikaMetadata = new Metadata(); @@ -258,7 +258,7 @@ private void fetchAndParseImpl(FetchAndParseRequest request, } responseObserver.onNext(fetchReplyBuilder.build()); } catch (IOException e) { - throw new RuntimeException(e); + throw new TikaGrpcException(e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } @@ -276,7 +276,7 @@ public void saveFetcher(SaveFetcherRequest request, saveFetcher(request.getFetcherId(), request.getPluginId(), fetcherConfigMap, tikaParamsMap); updateTikaConfig(); } catch (Exception e) { - throw new RuntimeException(e); + throw new TikaGrpcException(e); } responseObserver.onNext(reply); responseObserver.onCompleted(); @@ -389,7 +389,7 @@ public void deleteFetcher(DeleteFetcherRequest request, try { updateTikaConfig(); } catch (Exception e) { - throw new RuntimeException(e); + throw new TikaGrpcException(e); } } responseObserver.onNext(DeleteFetcherReply.newBuilder().setSuccess(successfulDelete).build()); @@ -404,7 +404,7 @@ public void getFetcherConfigJsonSchema(GetFetcherConfigJsonSchemaRequest request JsonSchema jsonSchema = JSON_SCHEMA_GENERATOR.generateSchema(fetcher.getClass()); builder.setFetcherConfigJsonSchema(OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(jsonSchema)); } catch (JsonProcessingException e) { - throw new RuntimeException("Could not create json schema for fetcher with plugin ID " + request.getPluginId(), e); + throw new TikaGrpcException("Could not create json schema for fetcher with plugin ID " + request.getPluginId(), e); } responseObserver.onNext(builder.build()); responseObserver.onCompleted(); @@ -414,7 +414,7 @@ private Fetcher getFetcher(String pluginId) { return pluginManager.getExtensions(Fetcher.class, pluginId) .stream() .findFirst() - .orElseThrow(); + .orElseThrow(() -> new TikaGrpcException("Could not find Fetcher extension for plugin " + pluginId)); } @Override diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/exception/TikaGrpcException.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/exception/TikaGrpcException.java index 383eedb32e..21a95f1358 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/exception/TikaGrpcException.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/exception/TikaGrpcException.java @@ -1,7 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.tika.pipes.grpc.exception; public class TikaGrpcException extends RuntimeException { + public TikaGrpcException(Throwable cause) { + super(cause); + } + public TikaGrpcException(String message, Throwable cause) { super(message, cause); } + + public TikaGrpcException(String message) { + super(message); + } } diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java index 472c1c975c..2e9552d36c 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.tika.pipes.grpc.plugin; import java.nio.file.Path; diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java index 21098dab5b..53faa96b09 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.tika.pipes.grpc.plugin; import java.nio.file.Path; @@ -6,8 +22,15 @@ import org.pf4j.DefaultPluginManager; import org.pf4j.PluginDescriptorFinder; import org.pf4j.PluginLoader; +import org.pf4j.PluginWrapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.tika.pipes.fetcher.Fetcher; +import org.apache.tika.pipes.grpc.exception.TikaGrpcException; public class GrpcPluginManager extends DefaultPluginManager { + private static final Logger LOGGER = LoggerFactory.getLogger(GrpcPluginManager.class); public GrpcPluginManager() { } @@ -28,4 +51,28 @@ protected PluginDescriptorFinder createPluginDescriptorFinder() { protected PluginLoader createPluginLoader() { return super.createPluginLoader(); } + + @Override + public void loadPlugins() { + super.loadPlugins(); + LOGGER.info("Loaded {} plugins", getPlugins().size()); + } + + @Override + public void startPlugins() { + super.startPlugins(); + for (PluginWrapper plugin : getStartedPlugins()) { + LOGGER.info("Add-in " + plugin.getPluginId() + " : " + plugin.getDescriptor() + " has started."); + checkFetcherExtensions(plugin); + } + } + + private void checkFetcherExtensions(PluginWrapper plugin) { + for (Class extensionClass : getExtensionClasses(Fetcher.class, plugin.getPluginId())) { + if (!Fetcher.class.isAssignableFrom(extensionClass)) { + throw new TikaGrpcException("Something is wrong with the classpath. " + Fetcher.class.getName() + " should be assignable from " + extensionClass.getName() + ". Did tika-core accidentally get in your plugin lib?"); + } + LOGGER.info(" Extension " + extensionClass + " has been registered to plugin " + plugin.getPluginId()); + } + } } diff --git a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java index 09ce85f982..5bafa67922 100644 --- a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java +++ b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java @@ -22,6 +22,7 @@ import java.time.Duration; import org.awaitility.Awaitility; +import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -35,31 +36,41 @@ class ExpiringFetcherStoreTest { @Test void createFetcher() { try (ExpiringFetcherStore expiringFetcherStore = new ExpiringFetcherStore(1, 5)) { - AbstractFetcher fetcher = new AbstractFetcher() { - @Override - public InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) { - return null; - } - }; - fetcher.setName("nick"); - FetcherConfig config = new FetcherConfig() { - }; - expiringFetcherStore.createFetcher(fetcher, config); + FetcherConfig config = getFetcherConfig(); + String fetcherId = "nicksFetcherId"; + expiringFetcherStore.createFetcher(fetcherId, config); Assertions.assertNotNull(expiringFetcherStore - .getFetchers() - .get(fetcher.getName())); + .getFetcherConfigs() + .get(fetcherId)); Awaitility .await() .atMost(Duration.ofSeconds(60)) .until(() -> expiringFetcherStore - .getFetchers() - .get(fetcher.getName()) == null); + .getFetcherConfigs() + .get(fetcherId) == null); assertNull(expiringFetcherStore .getFetcherConfigs() - .get(fetcher.getName())); + .get(fetcherId)); } } + + @NotNull + private static FetcherConfig getFetcherConfig() { + AbstractFetcher fetcher = new AbstractFetcher() { + @Override + public InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) { + return null; + } + }; + fetcher.setPluginId("nicksPlugin"); + return new FetcherConfig() { + @Override + public String getPluginId() { + return fetcher.getPluginId(); + } + }; + } } diff --git a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/PipesBiDirectionalStreamingIntegrationTest.java b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/PipesBiDirectionalStreamingIntegrationTest.java index e78110abb1..cb4559bc58 100644 --- a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/PipesBiDirectionalStreamingIntegrationTest.java +++ b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/PipesBiDirectionalStreamingIntegrationTest.java @@ -55,7 +55,7 @@ import org.apache.tika.SaveFetcherReply; import org.apache.tika.SaveFetcherRequest; import org.apache.tika.TikaGrpc; -import org.apache.tika.pipes.fetcher.http.HttpFetcher; +import org.apache.tika.pipes.fetcher.http.config.HttpFetcherConfig; /** * This test will start an HTTP server using jetty. @@ -155,7 +155,7 @@ void createHttpFetcher() throws Exception { SaveFetcherRequest saveFetcherRequest = SaveFetcherRequest .newBuilder() .setFetcherId(httpFetcherId) - .setFetcherClass(HttpFetcher.class.getName()) + .setPluginId(HttpFetcherConfig.PLUGIN_ID) .setFetcherConfigJson(OBJECT_MAPPER.writeValueAsString(ImmutableMap .builder() .put("requestTimeout", 30_000) diff --git a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java index d5aebed67f..ec9ace610f 100644 --- a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java +++ b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java @@ -23,6 +23,7 @@ import java.io.File; import java.nio.charset.StandardCharsets; +import java.nio.file.Path; import java.nio.file.Paths; import java.time.Duration; import java.time.LocalDateTime; @@ -48,11 +49,12 @@ import io.grpc.stub.StreamObserver; import org.apache.commons.io.FileUtils; import org.jetbrains.annotations.NotNull; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.pf4j.DefaultPluginManager; +import org.pf4j.PluginManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -67,6 +69,8 @@ import org.apache.tika.TikaGrpc; import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.fetcher.fs.FileSystemFetcher; +import org.apache.tika.pipes.fetcher.fs.config.FileSystemFetcherConfig; +import org.apache.tika.pipes.grpc.plugin.GrpcPluginManager; @ExtendWith(GrpcCleanupExtension.class) public class TikaGrpcServerTest { @@ -85,6 +89,22 @@ static void init() throws Exception { } static final int NUM_FETCHERS_TO_CREATE = 10; + static PluginManager pluginManager; + + @BeforeAll + static void loadPluginManager() throws Exception { + System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. + Path fetchersPath = Path.of("..", "tika-pipes", "tika-fetchers"); + LOG.info("Using pf4j in development mode using plugins dir: {}", fetchersPath.toFile().getCanonicalPath()); + pluginManager = new GrpcPluginManager(fetchersPath); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); + } + + @AfterAll + static void killPluginManager() { + pluginManager.stopPlugins(); + } @Test public void testFetcherCrud(Resources resources) throws Exception { @@ -94,7 +114,7 @@ public void testFetcherCrud(Resources resources) throws Exception { Server server = InProcessServerBuilder .forName(serverName) .directExecutor() - .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath(), new DefaultPluginManager())) + .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath(), pluginManager)) .build() .start(); resources.register(server, Duration.ofSeconds(10)); @@ -113,7 +133,7 @@ public void testFetcherCrud(Resources resources) throws Exception { SaveFetcherReply reply = blockingStub.saveFetcher(SaveFetcherRequest .newBuilder() .setFetcherId(fetcherId) - .setFetcherClass(FileSystemFetcher.class.getName()) + .setPluginId(FileSystemFetcherConfig.PLUGIN_ID) .setFetcherConfigJson(OBJECT_MAPPER.writeValueAsString(ImmutableMap .builder() .put("basePath", targetFolder) @@ -128,7 +148,7 @@ public void testFetcherCrud(Resources resources) throws Exception { SaveFetcherReply reply = blockingStub.saveFetcher(SaveFetcherRequest .newBuilder() .setFetcherId(fetcherId) - .setFetcherClass(FileSystemFetcher.class.getName()) + .setPluginId(FileSystemFetcherConfig.PLUGIN_ID) .setFetcherConfigJson(OBJECT_MAPPER.writeValueAsString(ImmutableMap .builder() .put("basePath", targetFolder) @@ -153,7 +173,7 @@ public void testFetcherCrud(Resources resources) throws Exception { .setFetcherId(fetcherId) .build()); assertEquals(fetcherId, getFetcherReply.getFetcherId()); - assertEquals(FileSystemFetcher.class.getName(), getFetcherReply.getFetcherClass()); + assertEquals(FileSystemFetcherConfig.PLUGIN_ID, getFetcherReply.getPluginId()); } // delete fetchers @@ -189,7 +209,7 @@ public void testBiStream(Resources resources) throws Exception { Server server = InProcessServerBuilder .forName(serverName) .directExecutor() - .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath(), new DefaultPluginManager())) + .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath(), pluginManager)) .build() .start(); resources.register(server, Duration.ofSeconds(10)); @@ -207,7 +227,7 @@ public void testBiStream(Resources resources) throws Exception { SaveFetcherReply reply = blockingStub.saveFetcher(SaveFetcherRequest .newBuilder() .setFetcherId(fetcherId) - .setFetcherClass(FileSystemFetcher.class.getName()) + .setPluginId(FileSystemFetcherConfig.PLUGIN_ID) .setFetcherConfigJson(OBJECT_MAPPER.writeValueAsString(ImmutableMap .builder() .put("basePath", targetFolder) diff --git a/tika-pipes/tika-fetchers/pom.xml b/tika-pipes/tika-fetchers/pom.xml index 7c1c78a0d0..5e271073b8 100644 --- a/tika-pipes/tika-fetchers/pom.xml +++ b/tika-pipes/tika-fetchers/pom.xml @@ -38,7 +38,7 @@ tika-fetcher-gcs tika-fetcher-az-blob tika-fetcher-microsoft-graph - tika-fetcher-fs + tika-fetcher-file-system tika-fetcher-url diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml index d262f7a118..0f16786352 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml @@ -61,7 +61,7 @@ maven-assembly-plugin - ${project.basedir}/src/assembly/assembly.xml + src/main/assembly/assembly.xml false diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/assembly/assembly.xml similarity index 100% rename from tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/assembly/assembly.xml rename to tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/assembly/assembly.xml diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java index dbc03bb451..6113891247 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java @@ -20,7 +20,7 @@ public class AZBlobFetcherConfig extends FetcherConfig { - public static final String PLUGIN_ID = getPluginIdForFetcherConfig(AZBlobFetcherConfig.class); + public static final String PLUGIN_ID = "az-blob-fetcher"; @Override public String getPluginId() { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-file-system/pom.xml similarity index 95% rename from tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml rename to tika-pipes/tika-fetchers/tika-fetcher-file-system/pom.xml index 2ac5bed9bf..935b0a0065 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-fs/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-file-system/pom.xml @@ -26,7 +26,7 @@ 4.0.0 - tika-fetcher-fs + tika-fetcher-file-system Apache Tika FS Fetcher Apache Tika Pipes Fetcher for Local File System @@ -58,7 +58,7 @@ maven-assembly-plugin - ${project.basedir}/src/assembly/assembly.xml + src/main/assembly/assembly.xml false diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/assembly/assembly.xml similarity index 100% rename from tika-pipes/tika-fetchers/tika-fetcher-fs/src/assembly/assembly.xml rename to tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/assembly/assembly.xml diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java similarity index 100% rename from tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java rename to tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcher.java diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherPlugin.java b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherPlugin.java similarity index 100% rename from tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherPlugin.java rename to tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherPlugin.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java similarity index 94% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java rename to tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java index 52ec0ecaab..ded6a9520c 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java @@ -20,7 +20,7 @@ public class FileSystemFetcherConfig extends FetcherConfig { - public static final String PLUGIN_ID = getPluginIdForFetcherConfig(FileSystemFetcherConfig.class); + public static final String PLUGIN_ID = "file-system-fetcher"; @Override public String getPluginId() { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/resources/plugin.properties b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/resources/plugin.properties similarity index 97% rename from tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/resources/plugin.properties rename to tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/resources/plugin.properties index 41b443a518..3e6f63af22 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/resources/plugin.properties +++ b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/resources/plugin.properties @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -plugin.id=fs-fetcher +plugin.id=file-system-fetcher plugin.class=org.apache.tika.pipes.fetcher.fs.FileSystemFetcherPlugin plugin.version=3.0.0-SNAPSHOT plugin.provider=Local File System Fetcher diff --git a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java deleted file mode 100644 index 52ec0ecaab..0000000000 --- a/tika-pipes/tika-fetchers/tika-fetcher-fs/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.fetcher.fs.config; - -import org.apache.tika.pipes.fetcher.config.FetcherConfig; - -public class FileSystemFetcherConfig extends FetcherConfig { - - public static final String PLUGIN_ID = getPluginIdForFetcherConfig(FileSystemFetcherConfig.class); - - @Override - public String getPluginId() { - return PLUGIN_ID; - } - private String basePath; - private boolean extractFileSystemMetadata; - - public String getBasePath() { - return basePath; - } - - public FileSystemFetcherConfig setBasePath(String basePath) { - this.basePath = basePath; - return this; - } - - public boolean isExtractFileSystemMetadata() { - return extractFileSystemMetadata; - } - - public FileSystemFetcherConfig setExtractFileSystemMetadata(boolean extractFileSystemMetadata) { - this.extractFileSystemMetadata = extractFileSystemMetadata; - return this; - } -} diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml index 467f51d572..ab40f98b25 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml @@ -61,7 +61,7 @@ maven-assembly-plugin - ${project.basedir}/src/assembly/assembly.xml + src/main/assembly/assembly.xml false diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/assembly/assembly.xml similarity index 100% rename from tika-pipes/tika-fetchers/tika-fetcher-gcs/src/assembly/assembly.xml rename to tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/assembly/assembly.xml diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java index 49b94572cb..5f6cf4d815 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java @@ -20,7 +20,7 @@ public class GCSFetcherConfig extends FetcherConfig { - public static final String PLUGIN_ID = getPluginIdForFetcherConfig(GCSFetcherConfig.class); + public static final String PLUGIN_ID = "gcs-fetcher"; @Override public String getPluginId() { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml index 59e6ff5c69..268aee9d36 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml @@ -78,7 +78,7 @@ maven-assembly-plugin - ${project.basedir}/src/assembly/assembly.xml + src/main/assembly/assembly.xml false diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/assembly/assembly.xml similarity index 100% rename from tika-pipes/tika-fetchers/tika-fetcher-http/src/assembly/assembly.xml rename to tika-pipes/tika-fetchers/tika-fetcher-http/src/main/assembly/assembly.xml diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java index 76fdc6ace5..0e43357da5 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java @@ -23,7 +23,7 @@ public class HttpFetcherConfig extends FetcherConfig { - public static final String PLUGIN_ID = getPluginIdForFetcherConfig(HttpFetcherConfig.class); + public static final String PLUGIN_ID = "http-fetcher"; @Override public String getPluginId() { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml index 135e0fc008..714813c917 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml @@ -145,7 +145,7 @@ maven-assembly-plugin - ${project.basedir}/src/assembly/assembly.xml + src/main/assembly/assembly.xml false diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/assembly/assembly.xml similarity index 100% rename from tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/assembly/assembly.xml rename to tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/assembly/assembly.xml diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java index 478790c682..1cb41f31c5 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java @@ -23,7 +23,7 @@ public class MicrosoftGraphFetcherConfig extends FetcherConfig { - public static final String PLUGIN_ID = getPluginIdForFetcherConfig(MicrosoftGraphFetcherConfig.class); + public static final String PLUGIN_ID = "microsoft-graph-fetcher"; @Override public String getPluginId() { return PLUGIN_ID; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml index e24aba28b5..ceae4f06f6 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml @@ -65,7 +65,7 @@ maven-assembly-plugin - ${project.basedir}/src/assembly/assembly.xml + src/main/assembly/assembly.xml false diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/assembly/assembly.xml similarity index 100% rename from tika-pipes/tika-fetchers/tika-fetcher-s3/src/assembly/assembly.xml rename to tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/assembly/assembly.xml diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java index 24bd37893f..110b4736bd 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java @@ -20,7 +20,7 @@ public class S3FetcherConfig extends FetcherConfig { - public static final String PLUGIN_ID = getPluginIdForFetcherConfig(S3FetcherConfig.class); + public static final String PLUGIN_ID = "s3-fetcher"; @Override public String getPluginId() { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml index 05d1290078..343d59e088 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml @@ -55,7 +55,7 @@ maven-assembly-plugin - ${project.basedir}/src/assembly/assembly.xml + src/main/assembly/assembly.xml false diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/src/assembly/assembly.xml b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/assembly/assembly.xml similarity index 100% rename from tika-pipes/tika-fetchers/tika-fetcher-url/src/assembly/assembly.xml rename to tika-pipes/tika-fetchers/tika-fetcher-url/src/main/assembly/assembly.xml diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java index c5d5531379..c4840ae833 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java @@ -20,7 +20,7 @@ public class UrlFetcherConfig extends FetcherConfig { - public static final String PLUGIN_ID = getPluginIdForFetcherConfig(UrlFetcherConfig.class); + public static final String PLUGIN_ID = "url-fetcher"; @Override public String getPluginId() { diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java index e7f1d210ef..77943bca17 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java @@ -108,7 +108,7 @@ public InputStream getInputStream(InputStream is, Metadata metadata, HttpHeaders Fetcher fetcher = fetcherManager.getFetcher(fetcherName); if (fetchRangeStart > -1 && fetchRangeEnd > -1 && !(fetcher instanceof RangeFetcher)) { throw new IllegalArgumentException( - "Can't call a fetch with a range on a fetcher that" + " is not a RangeFetcher: name=" + fetcher.getName() + " class=" + fetcher.getClass()); + "Can't call a fetch with a range on a fetcher that" + " is not a RangeFetcher: pluginId=" + fetcher.getPluginId() + " class=" + fetcher.getClass()); } return fetcher.fetch(fetchKey, metadata, parseContext); } catch (TikaException e) { From 82516fe61ba1a0ccc672cd5f8163a374a2cc0e48 Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Mon, 26 Aug 2024 10:17:58 -0500 Subject: [PATCH 09/11] TIKA-4272: publish the plugins too --- pom.xml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pom.xml b/pom.xml index 7a488f2d99..e64b1f047b 100644 --- a/pom.xml +++ b/pom.xml @@ -115,6 +115,13 @@ + + + + + + + From 664096315ab7321490eae2fea47966aedee3a773 Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Sun, 1 Sep 2024 22:23:20 -0500 Subject: [PATCH 10/11] TIKA-4272: move tika pipes code out of tika-core --- pom.xml | 1 - .../AbstractEmbeddedDocumentBytesHandler.java | 1 - .../BasicEmbeddedDocumentBytesHandler.java | 1 - .../EmbeddedDocumentBytesConfig.java | 2 +- tika-fuzzing/pom.xml | 5 + .../tika/pipes/grpc/ExpiringFetcherStore.java | 4 +- .../tika/pipes/grpc/TikaGrpcServer.java | 1 - .../tika/pipes/grpc/TikaGrpcServerImpl.java | 8 +- .../pipes/grpc/plugin/GrpcPluginManager.java | 4 +- .../pipes/grpc/ExpiringFetcherStoreTest.java | 2 +- tika-parent/pom.xml | 1 - tika-pipes/pom.xml | 2 + tika-pipes/tika-async-cli/pom.xml | 5 + .../tika/async/cli/AsyncProcessorTest.java | 2 +- tika-pipes/tika-emitters/pom.xml | 11 +- .../tika-emitters/tika-emitter-fs/pom.xml | 2 +- tika-pipes/tika-fetchers/pom.xml | 5 + .../azblob/config/AZBlobFetcherConfig.java | 2 +- .../fs/config/FileSystemFetcherConfig.java | 2 +- .../fetcher/gcs/config/GCSFetcherConfig.java | 2 +- .../http/config/HttpFetcherConfig.java | 2 +- .../config/MicrosoftGraphFetcherConfig.java | 2 +- .../fetcher/s3/config/S3FetcherConfig.java | 2 +- .../fetcher/url/config/UrlFetcherConfig.java | 2 +- tika-pipes/tika-pipes-core/pom.xml | 203 ++++++++++++++++++ .../tika/pipes/CompositePipesReporter.java | 0 .../pipes/FailedToStartClientException.java | 0 .../org/apache/tika/pipes/FetchEmitTuple.java | 2 +- .../org/apache/tika/pipes/HandlerConfig.java | 0 .../tika/pipes/LoggingPipesReporter.java | 0 .../org/apache/tika/pipes/PipesClient.java | 0 .../org/apache/tika/pipes/PipesConfig.java | 0 .../apache/tika/pipes/PipesConfigBase.java | 0 .../org/apache/tika/pipes/PipesException.java | 0 .../org/apache/tika/pipes/PipesParser.java | 0 .../org/apache/tika/pipes/PipesReporter.java | 0 .../apache/tika/pipes/PipesReporterBase.java | 0 .../org/apache/tika/pipes/PipesResult.java | 0 .../org/apache/tika/pipes/PipesServer.java | 2 +- .../apache/tika/pipes/async/AsyncConfig.java | 0 .../apache/tika/pipes/async/AsyncEmitter.java | 0 .../tika/pipes/async/AsyncProcessor.java | 0 .../apache/tika/pipes/async/AsyncStatus.java | 0 .../pipes/async/OfferLargerThanQueueSize.java | 0 .../tika/pipes/emitter/AbstractEmitter.java | 0 .../apache/tika/pipes/emitter/EmitData.java | 0 .../apache/tika/pipes/emitter/EmitKey.java | 0 .../apache/tika/pipes/emitter/Emitter.java | 0 .../tika/pipes/emitter/EmitterManager.java | 0 .../tika/pipes/emitter/EmptyEmitter.java | 0 .../tika/pipes/emitter/StreamEmitter.java | 0 .../pipes/emitter/TikaEmitterException.java | 0 .../EmittingEmbeddedDocumentBytesHandler.java | 1 + .../tika/pipes/fetcher/AbstractFetcher.java | 0 .../tika/pipes/fetcher/EmptyFetcher.java | 0 .../apache/tika/pipes/fetcher/FetchKey.java | 0 .../apache/tika/pipes/fetcher/Fetcher.java | 0 .../tika/pipes/fetcher/FetcherManager.java | 0 .../pipes/fetcher/FetcherStringException.java | 0 .../tika/pipes/fetcher/RangeFetcher.java | 0 .../pipes/fetcher/config/FetcherConfig.java | 2 +- .../config/FetcherConfigContainer.java | 0 .../pipesiterator/CallablePipesIterator.java | 0 .../pipes/pipesiterator/PipesIterator.java | 0 .../pipes/pipesiterator/TotalCountResult.java | 0 .../pipes/pipesiterator/TotalCounter.java | 0 .../filelist/FileListPipesIterator.java | 0 .../fs/FileSystemPipesIterator.java | 0 .../pipes-fork-server-default-log4j2.xml | 0 .../apache/tika/pipes/PipesClientTest.java | 0 .../apache/tika/pipes/PipesServerTest.java | 2 +- .../pipes/async/AsyncChaosMonkeyTest.java | 0 .../tika/pipes/async/MockDigesterFactory.java | 0 .../apache/tika/pipes/async/MockEmitter.java | 0 .../apache/tika/pipes/async/MockFetcher.java | 0 .../apache/tika/pipes/async/MockReporter.java | 0 .../tika/pipes/async/MockReporterTest.java | 0 .../pipes}/config/TikaPipesConfigTest.java | 3 +- .../tika/pipes/emitter/MockEmitter.java | 0 .../tika/pipes/fetcher/MockFetcher.java | 0 .../FileSystemPipesIteratorTest.java | 0 .../filelist/FileListPipesIteratorTest.java | 0 .../src/test/resources/log4j2.xml | 32 +++ .../org/apache/tika/pipes/TIKA-3941.xml | 0 .../tika/pipes/TIKA-4207-limit-bytes.xml | 0 .../org/apache/tika/pipes/TIKA-4207.xml | 0 .../org/apache/tika/pipes/async/TIKA-3507.xml | 0 .../org/apache/tika/pipes/async/TIKA-3865.xml | 0 .../apache/tika/pipes/tika-sample-config.xml | 0 tika-pipes/tika-pipes-iterators/pom.xml | 8 + tika-pipes/tika-pipes-reporters/pom.xml | 8 + .../tika-serialization}/pom.xml | 10 +- .../tika/serialization/JsonMetadata.java | 0 .../tika/serialization/JsonMetadataList.java | 0 .../JsonStreamingSerializer.java | 0 .../ParseContextDeserializer.java | 0 .../serialization/ParseContextSerializer.java | 0 .../PrettyMetadataKeyComparator.java | 0 .../serialization/TikaJsonDeserializer.java | 0 .../serialization/TikaJsonSerializer.java | 0 .../TikaSerializationException.java | 0 .../serialization/pipes/JsonEmitData.java | 0 .../pipes/JsonFetchEmitTuple.java | 0 .../pipes/JsonFetchEmitTupleList.java | 0 .../serialization/JsonMetadataListTest.java | 0 .../tika/serialization/JsonMetadataTest.java | 0 .../TestParseContextSerialization.java | 0 .../TikaJsonSerializationTest.java | 0 .../tika/serialization/mocks/ClassA.java | 0 .../tika/serialization/mocks/ClassB.java | 0 .../tika/serialization/mocks/ClassC.java | 0 .../pipes/JsonFetchEmitTupleListTest.java | 0 .../pipes/JsonFetchEmitTupleTest.java | 0 .../resources/config/tika-config-json.xml | 0 tika-server/tika-server-client/pom.xml | 7 +- tika-server/tika-server-core/pom.xml | 5 + .../server/core/resource/AsyncResource.java | 2 +- .../tika/server/standard/TikaPipesTest.java | 2 +- 118 files changed, 326 insertions(+), 34 deletions(-) rename tika-core/src/main/java/org/apache/tika/{pipes => }/extractor/EmbeddedDocumentBytesConfig.java (99%) create mode 100644 tika-pipes/tika-pipes-core/pom.xml rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/FailedToStartClientException.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/FetchEmitTuple.java (98%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/HandlerConfig.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/LoggingPipesReporter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/PipesClient.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/PipesConfig.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/PipesConfigBase.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/PipesException.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/PipesParser.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/PipesReporter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/PipesReporterBase.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/PipesResult.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/PipesServer.java (99%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/async/AsyncConfig.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/async/AsyncEmitter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/async/AsyncStatus.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/async/OfferLargerThanQueueSize.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/emitter/AbstractEmitter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/emitter/EmitData.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/emitter/EmitKey.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/emitter/Emitter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/emitter/EmitterManager.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/emitter/EmptyEmitter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/emitter/StreamEmitter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/emitter/TikaEmitterException.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/extractor/EmittingEmbeddedDocumentBytesHandler.java (98%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/fetcher/AbstractFetcher.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/fetcher/FetchKey.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/fetcher/FetcherStringException.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java (95%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfigContainer.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/pipesiterator/CallablePipesIterator.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/pipesiterator/TotalCountResult.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/pipesiterator/TotalCounter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIterator.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/java/org/apache/tika/pipes/pipesiterator/fs/FileSystemPipesIterator.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/main/resources/pipes-fork-server-default-log4j2.xml (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/PipesClientTest.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/PipesServerTest.java (99%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/async/AsyncChaosMonkeyTest.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/async/MockDigesterFactory.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/async/MockEmitter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/async/MockFetcher.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/async/MockReporter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java (100%) rename {tika-core/src/test/java/org/apache/tika => tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes}/config/TikaPipesConfigTest.java (97%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/emitter/MockEmitter.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/pipesiterator/FileSystemPipesIteratorTest.java (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIteratorTest.java (100%) create mode 100644 tika-pipes/tika-pipes-core/src/test/resources/log4j2.xml rename {tika-core => tika-pipes/tika-pipes-core}/src/test/resources/org/apache/tika/pipes/TIKA-3941.xml (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/resources/org/apache/tika/pipes/TIKA-4207-limit-bytes.xml (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/resources/org/apache/tika/pipes/TIKA-4207.xml (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/resources/org/apache/tika/pipes/async/TIKA-3507.xml (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/resources/org/apache/tika/pipes/async/TIKA-3865.xml (100%) rename {tika-core => tika-pipes/tika-pipes-core}/src/test/resources/org/apache/tika/pipes/tika-sample-config.xml (100%) rename {tika-serialization => tika-pipes/tika-serialization}/pom.xml (93%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/JsonMetadata.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/JsonMetadataList.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/JsonStreamingSerializer.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/PrettyMetadataKeyComparator.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/TikaSerializationException.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/pipes/JsonEmitData.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/pipes/JsonFetchEmitTuple.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/main/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleList.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/test/java/org/apache/tika/serialization/JsonMetadataListTest.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/test/java/org/apache/tika/serialization/JsonMetadataTest.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/test/java/org/apache/tika/serialization/mocks/ClassA.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/test/java/org/apache/tika/serialization/mocks/ClassB.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/test/java/org/apache/tika/serialization/mocks/ClassC.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/test/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleListTest.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/test/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleTest.java (100%) rename {tika-serialization => tika-pipes/tika-serialization}/src/test/resources/config/tika-config-json.xml (100%) diff --git a/pom.xml b/pom.xml index e64b1f047b..590ab055a8 100644 --- a/pom.xml +++ b/pom.xml @@ -38,7 +38,6 @@ tika-parent tika-bom tika-core - tika-serialization tika-parsers tika-bundles tika-xmp diff --git a/tika-core/src/main/java/org/apache/tika/extractor/AbstractEmbeddedDocumentBytesHandler.java b/tika-core/src/main/java/org/apache/tika/extractor/AbstractEmbeddedDocumentBytesHandler.java index 3e009f6665..3d4942c1b1 100644 --- a/tika-core/src/main/java/org/apache/tika/extractor/AbstractEmbeddedDocumentBytesHandler.java +++ b/tika-core/src/main/java/org/apache/tika/extractor/AbstractEmbeddedDocumentBytesHandler.java @@ -25,7 +25,6 @@ import org.apache.tika.io.FilenameUtils; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; -import org.apache.tika.pipes.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.utils.StringUtils; public abstract class AbstractEmbeddedDocumentBytesHandler implements EmbeddedDocumentBytesHandler { diff --git a/tika-core/src/main/java/org/apache/tika/extractor/BasicEmbeddedDocumentBytesHandler.java b/tika-core/src/main/java/org/apache/tika/extractor/BasicEmbeddedDocumentBytesHandler.java index cf6441b4fb..8ac983fd77 100644 --- a/tika-core/src/main/java/org/apache/tika/extractor/BasicEmbeddedDocumentBytesHandler.java +++ b/tika-core/src/main/java/org/apache/tika/extractor/BasicEmbeddedDocumentBytesHandler.java @@ -25,7 +25,6 @@ import org.apache.commons.io.input.UnsynchronizedBufferedInputStream; import org.apache.tika.metadata.Metadata; -import org.apache.tika.pipes.extractor.EmbeddedDocumentBytesConfig; /** * For now, this is an in-memory EmbeddedDocumentBytesHandler that stores diff --git a/tika-core/src/main/java/org/apache/tika/pipes/extractor/EmbeddedDocumentBytesConfig.java b/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentBytesConfig.java similarity index 99% rename from tika-core/src/main/java/org/apache/tika/pipes/extractor/EmbeddedDocumentBytesConfig.java rename to tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentBytesConfig.java index 542c1c8a30..df7980418d 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/extractor/EmbeddedDocumentBytesConfig.java +++ b/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentBytesConfig.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.tika.pipes.extractor; +package org.apache.tika.extractor; import java.io.Serializable; import java.util.Objects; diff --git a/tika-fuzzing/pom.xml b/tika-fuzzing/pom.xml index fdf94626c2..023148c2c8 100644 --- a/tika-fuzzing/pom.xml +++ b/tika-fuzzing/pom.xml @@ -39,6 +39,11 @@ ${project.version} provided + + org.apache.tika + tika-pipes-core + ${project.version} + ${project.groupId} tika-serialization diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/ExpiringFetcherStore.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/ExpiringFetcherStore.java index 35654a8643..52afe4aaa6 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/ExpiringFetcherStore.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/ExpiringFetcherStore.java @@ -76,7 +76,7 @@ public Map getFetcherConfigs() { * This method will get the fetcher, but will also log the access the fetcher as having * been accessed. This prevents the scheduled job from removing the stale fetcher. */ - public C getFetcherAndLogAccess(String fetcherId) { + public C getFetcherConfigAndLogAccess(String fetcherId) { fetcherLastAccessed.put(fetcherId, Instant.now()); return (C) fetcherConfigs.get(fetcherId); } @@ -84,7 +84,7 @@ public C getFetcherAndLogAccess(String fetcherId) { public void createFetcher(String fetcherId, C config) { config.setFetcherId(fetcherId); fetcherConfigs.put(fetcherId, config); - getFetcherAndLogAccess(fetcherId); + getFetcherConfigAndLogAccess(fetcherId); } @Override diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java index d08754eedc..0d6a2b819c 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java @@ -106,7 +106,6 @@ public void start() throws Exception { } pluginManager = pluginDirs == null ? new GrpcPluginManager() : new GrpcPluginManager(pluginDirs); pluginManager.loadPlugins(); - LOGGER.info("Loaded {} plugins", pluginManager.getPlugins().size()); pluginManager.startPlugins(); File tikaConfigFile = new File(tikaConfigXml.getAbsolutePath()); healthStatusManager.setStatus(TikaGrpcServer.class.getSimpleName(), ServingStatus.SERVING); diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java index 46e74d71aa..e83b4ede26 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java @@ -139,7 +139,7 @@ private void updateTikaConfig() fetchersElement.removeChild(fetchersElement.getChildNodes().item(i)); } for (var fetcherConfigEntry : expiringFetcherStore.getFetcherConfigs().entrySet()) { - Fetcher fetcherObject = getFetcher(fetcherConfigEntry.getValue().getPluginId()); + Fetcher fetcherObject = getFetcher(fetcherConfigEntry.getValue().getFetcherPluginId()); Map fetcherConfigParams = OBJECT_MAPPER.convertValue( expiringFetcherStore.getFetcherConfigs().get(fetcherConfigEntry.getKey()), new TypeReference<>() { @@ -218,7 +218,7 @@ public void fetchAndParse(FetchAndParseRequest request, private void fetchAndParseImpl(FetchAndParseRequest request, StreamObserver responseObserver) { FetcherConfig fetcherConfig = - expiringFetcherStore.getFetcherAndLogAccess(request.getFetcherId()); + expiringFetcherStore.getFetcherConfigAndLogAccess(request.getFetcherId()); if (fetcherConfig == null) { throw new TikaGrpcException( "Could not find fetcher with name " + request.getFetcherId()); @@ -339,7 +339,7 @@ public void getFetcher(GetFetcherRequest request, return; } getFetcherReply.setFetcherId(request.getFetcherId()); - getFetcherReply.setPluginId(fetcherConfig.getPluginId()); + getFetcherReply.setPluginId(fetcherConfig.getFetcherPluginId()); Map paramMap = OBJECT_MAPPER.convertValue(fetcherConfig, new TypeReference<>() {}); paramMap.forEach( (k, v) -> getFetcherReply.putParams(Objects.toString(k), Objects.toString(v))); @@ -364,7 +364,7 @@ private GetFetcherReply.Builder saveFetcherReply( Map.Entry fetcherConfigEntry) { FetcherConfig fetcherConfig = fetcherConfigEntry.getValue(); GetFetcherReply.Builder replyBuilder = - GetFetcherReply.newBuilder().setPluginId(fetcherConfig.getPluginId()) + GetFetcherReply.newBuilder().setPluginId(fetcherConfig.getFetcherPluginId()) .setFetcherId(fetcherConfig.getFetcherId()); loadParamsIntoReply(fetcherConfig, replyBuilder); return replyBuilder; diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java index 53faa96b09..0e35219772 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java @@ -70,7 +70,9 @@ public void startPlugins() { private void checkFetcherExtensions(PluginWrapper plugin) { for (Class extensionClass : getExtensionClasses(Fetcher.class, plugin.getPluginId())) { if (!Fetcher.class.isAssignableFrom(extensionClass)) { - throw new TikaGrpcException("Something is wrong with the classpath. " + Fetcher.class.getName() + " should be assignable from " + extensionClass.getName() + ". Did tika-core accidentally get in your plugin lib?"); + throw new TikaGrpcException("Something is wrong with the classpath. " + Fetcher.class.getName() + + " should be assignable from " + extensionClass.getName() + + ". Did tika-core accidentally get in your plugin lib?"); } LOGGER.info(" Extension " + extensionClass + " has been registered to plugin " + plugin.getPluginId()); } diff --git a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java index 5bafa67922..2ae636478a 100644 --- a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java +++ b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/ExpiringFetcherStoreTest.java @@ -68,7 +68,7 @@ public InputStream fetch(String fetchKey, Metadata metadata, ParseContext parseC fetcher.setPluginId("nicksPlugin"); return new FetcherConfig() { @Override - public String getPluginId() { + public String getFetcherPluginId() { return fetcher.getPluginId(); } }; diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml index 5d67056060..e16d3a2a2e 100644 --- a/tika-parent/pom.xml +++ b/tika-parent/pom.xml @@ -1044,7 +1044,6 @@ pf4j ${pf4j.version} - diff --git a/tika-pipes/pom.xml b/tika-pipes/pom.xml index d3bce6a4e7..d0a06dd1f3 100644 --- a/tika-pipes/pom.xml +++ b/tika-pipes/pom.xml @@ -30,12 +30,14 @@ pom + tika-pipes-core tika-httpclient-commons tika-fetchers tika-emitters tika-pipes-iterators tika-pipes-reporters tika-async-cli + tika-serialization diff --git a/tika-pipes/tika-async-cli/pom.xml b/tika-pipes/tika-async-cli/pom.xml index 9cccbe9a2a..6d64b03ee9 100644 --- a/tika-pipes/tika-async-cli/pom.xml +++ b/tika-pipes/tika-async-cli/pom.xml @@ -44,6 +44,11 @@ test-jar test + + org.apache.tika + tika-pipes-core + ${project.version} + org.apache.logging.log4j diff --git a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java index acadeeb7af..9b6a53933a 100644 --- a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java +++ b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java @@ -33,6 +33,7 @@ import org.junit.jupiter.api.io.TempDir; import org.apache.tika.TikaTest; +import org.apache.tika.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.ParseContext; @@ -40,7 +41,6 @@ import org.apache.tika.pipes.HandlerConfig; import org.apache.tika.pipes.async.AsyncProcessor; import org.apache.tika.pipes.emitter.EmitKey; -import org.apache.tika.pipes.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.fetcher.FetchKey; import org.apache.tika.pipes.pipesiterator.PipesIterator; import org.apache.tika.serialization.JsonMetadataList; diff --git a/tika-pipes/tika-emitters/pom.xml b/tika-pipes/tika-emitters/pom.xml index 6ae038c7a9..afa9d50a87 100644 --- a/tika-pipes/tika-emitters/pom.xml +++ b/tika-pipes/tika-emitters/pom.xml @@ -42,7 +42,16 @@ tika-emitter-jdbc + + + ${project.groupId} + tika-pipes-core + ${project.version} + provided + + + 3.0.0-BETA2-rc1 - \ No newline at end of file + diff --git a/tika-pipes/tika-emitters/tika-emitter-fs/pom.xml b/tika-pipes/tika-emitters/tika-emitter-fs/pom.xml index f34850a2a9..801ffaba01 100644 --- a/tika-pipes/tika-emitters/tika-emitter-fs/pom.xml +++ b/tika-pipes/tika-emitters/tika-emitter-fs/pom.xml @@ -113,4 +113,4 @@ 3.0.0-BETA2-rc1 - \ No newline at end of file + diff --git a/tika-pipes/tika-fetchers/pom.xml b/tika-pipes/tika-fetchers/pom.xml index 5e271073b8..b4637e0c20 100644 --- a/tika-pipes/tika-fetchers/pom.xml +++ b/tika-pipes/tika-fetchers/pom.xml @@ -49,6 +49,11 @@ provided + + org.apache.tika + tika-pipes-core + ${project.version} + org.apache.logging.log4j log4j-core diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java index 6113891247..5dc091ff23 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/config/AZBlobFetcherConfig.java @@ -23,7 +23,7 @@ public class AZBlobFetcherConfig extends FetcherConfig { public static final String PLUGIN_ID = "az-blob-fetcher"; @Override - public String getPluginId() { + public String getFetcherPluginId() { return PLUGIN_ID; } diff --git a/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java index ded6a9520c..10475d68e3 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/config/FileSystemFetcherConfig.java @@ -23,7 +23,7 @@ public class FileSystemFetcherConfig extends FetcherConfig { public static final String PLUGIN_ID = "file-system-fetcher"; @Override - public String getPluginId() { + public String getFetcherPluginId() { return PLUGIN_ID; } private String basePath; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java index 5f6cf4d815..bf934ae95d 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/config/GCSFetcherConfig.java @@ -23,7 +23,7 @@ public class GCSFetcherConfig extends FetcherConfig { public static final String PLUGIN_ID = "gcs-fetcher"; @Override - public String getPluginId() { + public String getFetcherPluginId() { return PLUGIN_ID; } private boolean spoolToTemp; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java index 0e43357da5..62446f4954 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/config/HttpFetcherConfig.java @@ -26,7 +26,7 @@ public class HttpFetcherConfig extends FetcherConfig { public static final String PLUGIN_ID = "http-fetcher"; @Override - public String getPluginId() { + public String getFetcherPluginId() { return PLUGIN_ID; } private String userName; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java index 1cb41f31c5..68981090e6 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/src/main/java/org/apache/tika/pipes/fetchers/microsoftgraph/config/MicrosoftGraphFetcherConfig.java @@ -25,7 +25,7 @@ public class MicrosoftGraphFetcherConfig extends FetcherConfig { public static final String PLUGIN_ID = "microsoft-graph-fetcher"; @Override - public String getPluginId() { + public String getFetcherPluginId() { return PLUGIN_ID; } private long[] throttleSeconds; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java index 110b4736bd..33918c2602 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/config/S3FetcherConfig.java @@ -23,7 +23,7 @@ public class S3FetcherConfig extends FetcherConfig { public static final String PLUGIN_ID = "s3-fetcher"; @Override - public String getPluginId() { + public String getFetcherPluginId() { return PLUGIN_ID; } private boolean spoolToTemp; diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java index c4840ae833..e3f250a97f 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/src/main/java/org/apache/tika/pipes/fetcher/url/config/UrlFetcherConfig.java @@ -23,7 +23,7 @@ public class UrlFetcherConfig extends FetcherConfig { public static final String PLUGIN_ID = "url-fetcher"; @Override - public String getPluginId() { + public String getFetcherPluginId() { return PLUGIN_ID; } } diff --git a/tika-pipes/tika-pipes-core/pom.xml b/tika-pipes/tika-pipes-core/pom.xml new file mode 100644 index 0000000000..84339a2eb1 --- /dev/null +++ b/tika-pipes/tika-pipes-core/pom.xml @@ -0,0 +1,203 @@ + + + + + + 4.0.0 + + + org.apache.tika + tika-pipes + 3.0.0-SNAPSHOT + ../pom.xml + + + tika-pipes-core + jar + Apache Tika Pipes core + https://tika.apache.org/ + + + + org.slf4j + slf4j-api + + + org.apache.tika + tika-core + ${project.version} + + + org.apache.tika + tika-core + ${project.version} + test-jar + test + + + org.pf4j + pf4j + + provided + + + commons-io + commons-io + + + + + + com.google.guava + guava + test + + + com.martensigwart + fakeload + ${fakeload.version} + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + ${checkstyle.plugin.version} + + + com.puppycrawl.tools + checkstyle + ${puppycrawl.version} + + + + + validate + validate + + checkstyle.xml + UTF-8 + false + true + ${project.basedir}/src/test/java + error + true + + + check + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.tika.pipes.core + + + + + + + test-jar + + + + + + maven-failsafe-plugin + ${maven.failsafe.version} + + + + ${project.build.directory}/${project.build.finalName}.jar + + + + + + + integration-test + verify + + + + + + + + + + + org.codehaus.mojo + findbugs-maven-plugin + 3.0.5 + + -Xmx256m + 240000 + max + true + + + + org.apache.maven.plugins + maven-project-info-reports-plugin + ${maven.project.info.reports.version} + + + + index + + + + + + + + This is the core Apache Tikaâ„¢ toolkit library for Tika Pipes. + + The Apache Software Foundation + http://www.apache.org + + + JIRA + https://issues.apache.org/jira/browse/TIKA + + + Jenkins + https://builds.apache.org/job/Tika-trunk/ + + + + 3.0.0-BETA2-rc1 + + diff --git a/tika-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/FailedToStartClientException.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/FailedToStartClientException.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/FailedToStartClientException.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/FailedToStartClientException.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/FetchEmitTuple.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/FetchEmitTuple.java similarity index 98% rename from tika-core/src/main/java/org/apache/tika/pipes/FetchEmitTuple.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/FetchEmitTuple.java index a0f40901ba..07b5a4c69c 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/FetchEmitTuple.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/FetchEmitTuple.java @@ -19,10 +19,10 @@ import java.io.Serializable; import java.util.Objects; +import org.apache.tika.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.pipes.emitter.EmitKey; -import org.apache.tika.pipes.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.fetcher.FetchKey; public class FetchEmitTuple implements Serializable { diff --git a/tika-core/src/main/java/org/apache/tika/pipes/HandlerConfig.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/HandlerConfig.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/HandlerConfig.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/HandlerConfig.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/LoggingPipesReporter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/LoggingPipesReporter.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/LoggingPipesReporter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/LoggingPipesReporter.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesClient.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesClient.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesConfig.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfig.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/PipesConfig.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfig.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesConfigBase.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfigBase.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/PipesConfigBase.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfigBase.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesException.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesException.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/PipesException.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesException.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesParser.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesParser.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/PipesParser.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesParser.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesReporter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesReporter.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/PipesReporter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesReporter.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesReporterBase.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesReporterBase.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/PipesReporterBase.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesReporterBase.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesResult.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesResult.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/PipesResult.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesResult.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesServer.java similarity index 99% rename from tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesServer.java index dffb7c9ce2..1c0c204c78 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesServer.java @@ -48,6 +48,7 @@ import org.apache.tika.extractor.BasicEmbeddedDocumentBytesHandler; import org.apache.tika.extractor.DocumentSelector; import org.apache.tika.extractor.EmbeddedDocumentByteStoreExtractorFactory; +import org.apache.tika.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.extractor.EmbeddedDocumentBytesHandler; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory; @@ -70,7 +71,6 @@ import org.apache.tika.pipes.emitter.EmitterManager; import org.apache.tika.pipes.emitter.StreamEmitter; import org.apache.tika.pipes.emitter.TikaEmitterException; -import org.apache.tika.pipes.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.extractor.EmittingEmbeddedDocumentBytesHandler; import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.fetcher.FetcherManager; diff --git a/tika-core/src/main/java/org/apache/tika/pipes/async/AsyncConfig.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncConfig.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/async/AsyncConfig.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncConfig.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/async/AsyncEmitter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncEmitter.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/async/AsyncEmitter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncEmitter.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/async/AsyncStatus.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncStatus.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/async/AsyncStatus.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncStatus.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/async/OfferLargerThanQueueSize.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/OfferLargerThanQueueSize.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/async/OfferLargerThanQueueSize.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/OfferLargerThanQueueSize.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/emitter/AbstractEmitter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/AbstractEmitter.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/emitter/AbstractEmitter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/AbstractEmitter.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitData.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/EmitData.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitData.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/EmitData.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitKey.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/EmitKey.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitKey.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/EmitKey.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/emitter/Emitter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/Emitter.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/emitter/Emitter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/Emitter.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitterManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/EmitterManager.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitterManager.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/EmitterManager.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/emitter/EmptyEmitter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/EmptyEmitter.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/emitter/EmptyEmitter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/EmptyEmitter.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/emitter/StreamEmitter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/StreamEmitter.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/emitter/StreamEmitter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/StreamEmitter.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/emitter/TikaEmitterException.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/TikaEmitterException.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/emitter/TikaEmitterException.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/TikaEmitterException.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/extractor/EmittingEmbeddedDocumentBytesHandler.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/extractor/EmittingEmbeddedDocumentBytesHandler.java similarity index 98% rename from tika-core/src/main/java/org/apache/tika/pipes/extractor/EmittingEmbeddedDocumentBytesHandler.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/extractor/EmittingEmbeddedDocumentBytesHandler.java index 07c9f7507f..7577da8879 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/extractor/EmittingEmbeddedDocumentBytesHandler.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/extractor/EmittingEmbeddedDocumentBytesHandler.java @@ -22,6 +22,7 @@ import org.apache.tika.exception.TikaConfigException; import org.apache.tika.extractor.AbstractEmbeddedDocumentBytesHandler; +import org.apache.tika.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.pipes.FetchEmitTuple; diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/AbstractFetcher.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/AbstractFetcher.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/AbstractFetcher.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/AbstractFetcher.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetchKey.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/FetchKey.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetchKey.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/FetchKey.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherStringException.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherStringException.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherStringException.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherStringException.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/RangeFetcher.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java similarity index 95% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java index e9df451891..c029f3d6f1 100644 --- a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java @@ -19,7 +19,7 @@ public abstract class FetcherConfig { private String fetcherId; - abstract public String getPluginId(); + abstract public String getFetcherPluginId(); public String getFetcherId() { return fetcherId; diff --git a/tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfigContainer.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfigContainer.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfigContainer.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfigContainer.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/CallablePipesIterator.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/CallablePipesIterator.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/CallablePipesIterator.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/CallablePipesIterator.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/TotalCountResult.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/TotalCountResult.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/TotalCountResult.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/TotalCountResult.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/TotalCounter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/TotalCounter.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/TotalCounter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/TotalCounter.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIterator.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIterator.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIterator.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIterator.java diff --git a/tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/FileSystemPipesIterator.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/FileSystemPipesIterator.java similarity index 100% rename from tika-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/FileSystemPipesIterator.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/FileSystemPipesIterator.java diff --git a/tika-core/src/main/resources/pipes-fork-server-default-log4j2.xml b/tika-pipes/tika-pipes-core/src/main/resources/pipes-fork-server-default-log4j2.xml similarity index 100% rename from tika-core/src/main/resources/pipes-fork-server-default-log4j2.xml rename to tika-pipes/tika-pipes-core/src/main/resources/pipes-fork-server-default-log4j2.xml diff --git a/tika-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/PipesClientTest.java diff --git a/tika-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java similarity index 99% rename from tika-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java index ff80bb9160..579f298d64 100644 --- a/tika-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java +++ b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java @@ -32,10 +32,10 @@ import org.apache.tika.TikaTest; import org.apache.tika.extractor.BasicEmbeddedDocumentBytesHandler; +import org.apache.tika.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.pipes.emitter.EmitKey; -import org.apache.tika.pipes.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.fetcher.FetchKey; import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.fetcher.FetcherManager; diff --git a/tika-core/src/test/java/org/apache/tika/pipes/async/AsyncChaosMonkeyTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/AsyncChaosMonkeyTest.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/async/AsyncChaosMonkeyTest.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/AsyncChaosMonkeyTest.java diff --git a/tika-core/src/test/java/org/apache/tika/pipes/async/MockDigesterFactory.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockDigesterFactory.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/async/MockDigesterFactory.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockDigesterFactory.java diff --git a/tika-core/src/test/java/org/apache/tika/pipes/async/MockEmitter.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockEmitter.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/async/MockEmitter.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockEmitter.java diff --git a/tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockFetcher.java diff --git a/tika-core/src/test/java/org/apache/tika/pipes/async/MockReporter.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporter.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/async/MockReporter.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporter.java diff --git a/tika-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java diff --git a/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/config/TikaPipesConfigTest.java similarity index 97% rename from tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/config/TikaPipesConfigTest.java index 0f1ab2c906..3f1a2e584c 100644 --- a/tika-core/src/test/java/org/apache/tika/config/TikaPipesConfigTest.java +++ b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/config/TikaPipesConfigTest.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.tika.config; +package org.apache.tika.pipes.config; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -26,6 +26,7 @@ import org.junit.jupiter.api.Test; +import org.apache.tika.config.AbstractTikaConfigTest; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.pipes.CompositePipesReporter; import org.apache.tika.pipes.PipesReporter; diff --git a/tika-core/src/test/java/org/apache/tika/pipes/emitter/MockEmitter.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/emitter/MockEmitter.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/emitter/MockEmitter.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/emitter/MockEmitter.java diff --git a/tika-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/fetcher/MockFetcher.java diff --git a/tika-core/src/test/java/org/apache/tika/pipes/pipesiterator/FileSystemPipesIteratorTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/pipesiterator/FileSystemPipesIteratorTest.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/pipesiterator/FileSystemPipesIteratorTest.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/pipesiterator/FileSystemPipesIteratorTest.java diff --git a/tika-core/src/test/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIteratorTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIteratorTest.java similarity index 100% rename from tika-core/src/test/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIteratorTest.java rename to tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIteratorTest.java diff --git a/tika-pipes/tika-pipes-core/src/test/resources/log4j2.xml b/tika-pipes/tika-pipes-core/src/test/resources/log4j2.xml new file mode 100644 index 0000000000..5f946e6e5c --- /dev/null +++ b/tika-pipes/tika-pipes-core/src/test/resources/log4j2.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + diff --git a/tika-core/src/test/resources/org/apache/tika/pipes/TIKA-3941.xml b/tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/TIKA-3941.xml similarity index 100% rename from tika-core/src/test/resources/org/apache/tika/pipes/TIKA-3941.xml rename to tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/TIKA-3941.xml diff --git a/tika-core/src/test/resources/org/apache/tika/pipes/TIKA-4207-limit-bytes.xml b/tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/TIKA-4207-limit-bytes.xml similarity index 100% rename from tika-core/src/test/resources/org/apache/tika/pipes/TIKA-4207-limit-bytes.xml rename to tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/TIKA-4207-limit-bytes.xml diff --git a/tika-core/src/test/resources/org/apache/tika/pipes/TIKA-4207.xml b/tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/TIKA-4207.xml similarity index 100% rename from tika-core/src/test/resources/org/apache/tika/pipes/TIKA-4207.xml rename to tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/TIKA-4207.xml diff --git a/tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3507.xml b/tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3507.xml similarity index 100% rename from tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3507.xml rename to tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3507.xml diff --git a/tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3865.xml b/tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3865.xml similarity index 100% rename from tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3865.xml rename to tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3865.xml diff --git a/tika-core/src/test/resources/org/apache/tika/pipes/tika-sample-config.xml b/tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/tika-sample-config.xml similarity index 100% rename from tika-core/src/test/resources/org/apache/tika/pipes/tika-sample-config.xml rename to tika-pipes/tika-pipes-core/src/test/resources/org/apache/tika/pipes/tika-sample-config.xml diff --git a/tika-pipes/tika-pipes-iterators/pom.xml b/tika-pipes/tika-pipes-iterators/pom.xml index 2106a1b045..5e85241b5c 100644 --- a/tika-pipes/tika-pipes-iterators/pom.xml +++ b/tika-pipes/tika-pipes-iterators/pom.xml @@ -44,6 +44,14 @@ tika-pipes-iterator-az-blob + + + org.apache.tika + tika-pipes-core + ${project.version} + + + 3.0.0-BETA2-rc1 diff --git a/tika-pipes/tika-pipes-reporters/pom.xml b/tika-pipes/tika-pipes-reporters/pom.xml index 13ea50a4db..9d0e7d2e94 100644 --- a/tika-pipes/tika-pipes-reporters/pom.xml +++ b/tika-pipes/tika-pipes-reporters/pom.xml @@ -37,6 +37,14 @@ tika-pipes-reporter-jdbc + + + org.apache.tika + tika-pipes-core + ${project.version} + + + 3.0.0-BETA2-rc1 diff --git a/tika-serialization/pom.xml b/tika-pipes/tika-serialization/pom.xml similarity index 93% rename from tika-serialization/pom.xml rename to tika-pipes/tika-serialization/pom.xml index bfc12cb12f..de6e9f51d2 100644 --- a/tika-serialization/pom.xml +++ b/tika-pipes/tika-serialization/pom.xml @@ -24,9 +24,9 @@ org.apache.tika - tika-parent + tika-pipes 3.0.0-SNAPSHOT - ../tika-parent/pom.xml + ../pom.xml tika-serialization @@ -47,6 +47,12 @@ ${project.version} provided + + ${project.groupId} + tika-pipes-core + ${project.version} + provided + com.fasterxml.jackson.core jackson-core diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonStreamingSerializer.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/JsonStreamingSerializer.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/JsonStreamingSerializer.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/JsonStreamingSerializer.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/PrettyMetadataKeyComparator.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/PrettyMetadataKeyComparator.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/PrettyMetadataKeyComparator.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/PrettyMetadataKeyComparator.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaSerializationException.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/TikaSerializationException.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/TikaSerializationException.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/TikaSerializationException.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonEmitData.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonEmitData.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonEmitData.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonEmitData.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonFetchEmitTuple.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonFetchEmitTuple.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonFetchEmitTuple.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonFetchEmitTuple.java diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleList.java b/tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleList.java similarity index 100% rename from tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleList.java rename to tika-pipes/tika-serialization/src/main/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleList.java diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataListTest.java b/tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataListTest.java similarity index 100% rename from tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataListTest.java rename to tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataListTest.java diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataTest.java b/tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataTest.java similarity index 100% rename from tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataTest.java rename to tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/JsonMetadataTest.java diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java b/tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java similarity index 100% rename from tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java rename to tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java b/tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java similarity index 100% rename from tika-serialization/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java rename to tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassA.java b/tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassA.java similarity index 100% rename from tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassA.java rename to tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassA.java diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassB.java b/tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassB.java similarity index 100% rename from tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassB.java rename to tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassB.java diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassC.java b/tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassC.java similarity index 100% rename from tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassC.java rename to tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassC.java diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleListTest.java b/tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleListTest.java similarity index 100% rename from tika-serialization/src/test/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleListTest.java rename to tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleListTest.java diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleTest.java b/tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleTest.java similarity index 100% rename from tika-serialization/src/test/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleTest.java rename to tika-pipes/tika-serialization/src/test/java/org/apache/tika/serialization/pipes/JsonFetchEmitTupleTest.java diff --git a/tika-serialization/src/test/resources/config/tika-config-json.xml b/tika-pipes/tika-serialization/src/test/resources/config/tika-config-json.xml similarity index 100% rename from tika-serialization/src/test/resources/config/tika-config-json.xml rename to tika-pipes/tika-serialization/src/test/resources/config/tika-config-json.xml diff --git a/tika-server/tika-server-client/pom.xml b/tika-server/tika-server-client/pom.xml index 2bc9d5fb02..9963d8fa6b 100644 --- a/tika-server/tika-server-client/pom.xml +++ b/tika-server/tika-server-client/pom.xml @@ -32,6 +32,11 @@ tika-core ${project.version} + + org.apache.tika + tika-pipes-core + ${project.version} + ${project.groupId} tika-serialization @@ -132,4 +137,4 @@ 3.0.0-BETA2-rc1 - \ No newline at end of file + diff --git a/tika-server/tika-server-core/pom.xml b/tika-server/tika-server-core/pom.xml index 1f4b63ad45..63a3399f42 100644 --- a/tika-server/tika-server-core/pom.xml +++ b/tika-server/tika-server-core/pom.xml @@ -41,6 +41,11 @@ tika-core ${project.version} + + org.apache.tika + tika-pipes-core + ${project.version} + ${project.groupId} tika-translate diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java index 79107476eb..84e485c510 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java @@ -40,6 +40,7 @@ import org.xml.sax.SAXException; import org.apache.tika.exception.TikaException; +import org.apache.tika.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.ParseContext; @@ -48,7 +49,6 @@ import org.apache.tika.pipes.async.OfferLargerThanQueueSize; import org.apache.tika.pipes.emitter.EmitData; import org.apache.tika.pipes.emitter.EmitterManager; -import org.apache.tika.pipes.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.fetcher.FetchKey; import org.apache.tika.serialization.pipes.JsonFetchEmitTupleList; diff --git a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java index 36dc60a3c9..5ddca0492f 100644 --- a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java +++ b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java @@ -49,6 +49,7 @@ import org.junit.jupiter.api.io.TempDir; import org.apache.tika.exception.TikaConfigException; +import org.apache.tika.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.ParseContext; @@ -56,7 +57,6 @@ import org.apache.tika.pipes.FetchEmitTuple; import org.apache.tika.pipes.HandlerConfig; import org.apache.tika.pipes.emitter.EmitKey; -import org.apache.tika.pipes.extractor.EmbeddedDocumentBytesConfig; import org.apache.tika.pipes.fetcher.FetchKey; import org.apache.tika.pipes.fetcher.FetcherManager; import org.apache.tika.sax.BasicContentHandlerFactory; From 08d6387b86bf92567b1e0b8f101fd386993c2476 Mon Sep 17 00:00:00 2001 From: Nicholas DiPiazza Date: Fri, 6 Sep 2024 07:13:33 -0500 Subject: [PATCH 11/11] TIKA-4272: start moving the plugin manager stuff into the pipes server --- .../apache/tika/fuzzing/cli/FuzzingCLI.java | 2 +- .../tika/pipes/grpc/TikaGrpcServer.java | 8 +- .../tika/pipes/grpc/TikaGrpcServerImpl.java | 258 ++++++++---------- .../tika/pipes/grpc/TikaGrpcServerTest.java | 23 +- .../pipes/s3/tests/PipeIntegrationTests.java | 20 +- .../tika-fetcher-az-blob/pom.xml | 2 +- .../fetcher/azblob/TestAZBlobFetcher.java | 19 +- .../tika-fetcher-file-system/pom.xml | 2 +- .../tika-fetchers/tika-fetcher-gcs/pom.xml | 2 +- .../tika/pipes/fetcher/s3/TestGCSFetcher.java | 16 +- .../tika-fetchers/tika-fetcher-http/pom.xml | 2 +- .../pipes/fetcher/http/HttpFetcherTest.java | 16 +- .../tika-fetcher-microsoft-graph/pom.xml | 2 +- .../tika-fetchers/tika-fetcher-s3/pom.xml | 2 +- .../tika/pipes/fetcher/s3/TestS3Fetcher.java | 16 +- .../tika-fetchers/tika-fetcher-url/pom.xml | 2 +- .../tika/pipes/CompositePipesReporter.java | 1 + .../org/apache/tika/pipes/PipesClient.java | 4 + .../org/apache/tika/pipes/PipesConfig.java | 6 + .../apache/tika/pipes/PipesConfigBase.java | 9 + .../org/apache/tika/pipes/PipesServer.java | 25 +- .../apache/tika/pipes/async/AsyncConfig.java | 2 +- .../tika/pipes/async/AsyncProcessor.java | 2 +- .../apache/tika/pipes/emitter/Emitter.java | 4 +- .../exception/PipesRuntimeException.java | 22 ++ .../tika/pipes/fetcher/FetcherManager.java | 75 ++--- .../pipes/fetcher/config/FetcherConfig.java | 4 + .../pipes/pipesiterator/PipesIterator.java | 7 +- .../filelist/FileListPipesIterator.java | 2 +- .../fs/FileSystemPipesIterator.java | 2 +- .../pipesiterator/fs/IPipesIterator.java | 26 ++ .../ClasspathPluginPropertiesFinder.java | 2 +- .../tika/pipes/plugin/TikaPluginManager.java | 16 +- .../pipes/{ => reporter}/PipesReporter.java | 4 +- .../{ => reporter}/PipesReporterBase.java | 3 +- .../logging}/LoggingPipesReporter.java | 6 +- .../apache/tika/pipes/PipesServerTest.java | 46 +++- .../apache/tika/pipes/async/MockReporter.java | 2 +- .../tika/pipes/async/MockReporterTest.java | 2 +- .../pipes/config/TikaPipesConfigTest.java | 28 +- tika-pipes/tika-pipes-iterators/pom.xml | 6 + .../azblob/AZBlobPipesIterator.java | 2 +- .../pipesiterator/csv/CSVPipesIterator.java | 2 +- .../pipesiterator/gcs/GCSPipesIterator.java | 2 +- .../pipesiterator/jdbc/JDBCPipesIterator.java | 2 +- .../pipesiterator/json/JsonPipesIterator.java | 2 +- .../kafka/KafkaPipesIterator.java | 2 +- .../pipesiterator/s3/S3PipesIterator.java | 2 +- .../pipesiterator/solr/SolrPipesIterator.java | 2 +- .../fs/FileSystemStatusReporter.java | 2 +- .../fs/TestFileSystemStatusReporter.java | 2 +- .../reporters/jdbc/JDBCPipesReporter.java | 2 +- .../reporters/jdbc/TestJDBCPipesReporter.java | 2 +- .../opensearch/OpenSearchPipesReporter.java | 2 +- .../tika/server/core/TikaServerProcess.java | 2 +- .../server/core/TikaResourceFetcherTest.java | 13 +- 56 files changed, 414 insertions(+), 325 deletions(-) create mode 100644 tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/exception/PipesRuntimeException.java create mode 100644 tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/IPipesIterator.java rename {tika-grpc/src/main/java/org/apache/tika/pipes/grpc => tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes}/plugin/ClasspathPluginPropertiesFinder.java (97%) rename tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java => tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/plugin/TikaPluginManager.java (84%) rename tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/{ => reporter}/PipesReporter.java (95%) rename tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/{ => reporter}/PipesReporterBase.java (98%) rename tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/{ => reporter/logging}/LoggingPipesReporter.java (88%) diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java index fb38c20f80..52834ac4aa 100644 --- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java +++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java @@ -77,7 +77,7 @@ private void execute(FuzzingCLIConfig config) throws Exception { ArrayBlockingQueue q = new ArrayBlockingQueue(10000); PipesConfig pipesConfig = PipesConfig.load(config.getTikaConfig()); - FetcherManager fetcherManager = FetcherManager.load(config.getTikaConfig()); + FetcherManager fetcherManager = new FetcherManager(); int totalThreads = pipesConfig.getNumClients() + 1; diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java index 0d6a2b819c..5779445f5a 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java @@ -34,13 +34,11 @@ import io.grpc.TlsServerCredentials; import io.grpc.protobuf.services.HealthStatusManager; import io.grpc.protobuf.services.ProtoReflectionService; -import org.pf4j.PluginManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.tika.config.TikaConfig; import org.apache.tika.config.TikaConfigSerializer; -import org.apache.tika.pipes.grpc.plugin.GrpcPluginManager; /** * Server that manages startup/shutdown of the GRPC Tika server. @@ -50,7 +48,6 @@ public class TikaGrpcServer { public static final int TIKA_SERVER_GRPC_DEFAULT_PORT = 50052; private Server server; // create the plugin manager - private PluginManager pluginManager; @Parameter(names = {"-p", "--port"}, description = "The grpc server port", help = true) private Integer port = TIKA_SERVER_GRPC_DEFAULT_PORT; @@ -104,14 +101,11 @@ public void start() throws Exception { TikaConfigSerializer.serialize(new TikaConfig(), TikaConfigSerializer.Mode.STATIC_FULL, fw, StandardCharsets.UTF_8); } } - pluginManager = pluginDirs == null ? new GrpcPluginManager() : new GrpcPluginManager(pluginDirs); - pluginManager.loadPlugins(); - pluginManager.startPlugins(); File tikaConfigFile = new File(tikaConfigXml.getAbsolutePath()); healthStatusManager.setStatus(TikaGrpcServer.class.getSimpleName(), ServingStatus.SERVING); server = Grpc .newServerBuilderForPort(port, creds) - .addService(new TikaGrpcServerImpl(tikaConfigFile.getAbsolutePath(), pluginManager)) + .addService(new TikaGrpcServerImpl(tikaConfigFile.getAbsolutePath(), pluginDirs)) .addService(healthStatusManager.getHealthService()) .addService(ProtoReflectionService.newInstance()) .build() diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java index e83b4ede26..424bac77c2 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java +++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java @@ -17,38 +17,27 @@ package org.apache.tika.pipes.grpc; import java.io.File; -import java.io.FileWriter; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.nio.file.Path; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.module.jsonSchema.JsonSchema; import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator; import com.google.rpc.Status; import io.grpc.protobuf.StatusProto; import io.grpc.stub.StreamObserver; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; -import org.pf4j.PluginManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.w3c.dom.Document; -import org.w3c.dom.Element; import org.xml.sax.SAXException; import org.apache.tika.DeleteFetcherReply; @@ -66,7 +55,6 @@ import org.apache.tika.SaveFetcherReply; import org.apache.tika.SaveFetcherRequest; import org.apache.tika.TikaGrpc; -import org.apache.tika.config.Initializable; import org.apache.tika.config.Param; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.metadata.Metadata; @@ -77,7 +65,6 @@ import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.emitter.EmitKey; import org.apache.tika.pipes.fetcher.FetchKey; -import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.fetcher.config.FetcherConfig; import org.apache.tika.pipes.fetcher.config.FetcherConfigContainer; import org.apache.tika.pipes.grpc.exception.TikaGrpcException; @@ -90,8 +77,6 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase { } public static final JsonSchemaGenerator JSON_SCHEMA_GENERATOR = new JsonSchemaGenerator(OBJECT_MAPPER); - private final PluginManager pluginManager; - /** * FetcherID is key, The pair is the Fetcher object and the Metadata */ @@ -99,9 +84,10 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase { PipesClient pipesClient; ExpiringFetcherStore expiringFetcherStore; + String tikaConfigPath; - TikaGrpcServerImpl(String tikaConfigPath, PluginManager pluginManager) throws TikaConfigException, IOException, + TikaGrpcServerImpl(String tikaConfigPath, List pluginDirs) throws TikaConfigException, IOException, ParserConfigurationException, TransformerException, SAXException { File tikaConfigFile = new File(tikaConfigPath); if (!tikaConfigFile.canWrite()) { @@ -113,72 +99,72 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase { tikaConfigFile = tmpTikaConfigFile; tikaConfigPath = tikaConfigFile.getAbsolutePath(); } - pipesConfig = PipesConfig.load(tikaConfigFile.toPath()); + pipesConfig = PipesConfig.load(tikaConfigFile.toPath(), pluginDirs); pipesClient = new PipesClient(pipesConfig); expiringFetcherStore = new ExpiringFetcherStore(pipesConfig.getStaleFetcherTimeoutSeconds(), pipesConfig.getStaleFetcherDelaySeconds()); this.tikaConfigPath = tikaConfigPath; - updateTikaConfig(); - - this.pluginManager = pluginManager; - } - - private void updateTikaConfig() - throws ParserConfigurationException, IOException, SAXException, TransformerException { - Document tikaConfigDoc = - DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(tikaConfigPath); - - Element fetchersElement = (Element) tikaConfigDoc.getElementsByTagName("fetchers").item(0); - if (fetchersElement == null) { - fetchersElement = tikaConfigDoc.createElement("fetchers"); - tikaConfigDoc.getDocumentElement().appendChild(fetchersElement); - } - for (int i = 0; i < fetchersElement.getChildNodes().getLength(); ++i) { - fetchersElement.removeChild(fetchersElement.getChildNodes().item(i)); - } - for (var fetcherConfigEntry : expiringFetcherStore.getFetcherConfigs().entrySet()) { - Fetcher fetcherObject = getFetcher(fetcherConfigEntry.getValue().getFetcherPluginId()); - Map fetcherConfigParams = OBJECT_MAPPER.convertValue( - expiringFetcherStore.getFetcherConfigs().get(fetcherConfigEntry.getKey()), - new TypeReference<>() { - }); - Element fetcher = tikaConfigDoc.createElement("fetcher"); - fetcher.setAttribute("class", fetcherObject.getClass().getName()); - Element pluginIdElm = tikaConfigDoc.createElement("pluginId"); - pluginIdElm.setTextContent(fetcherObject.getPluginId()); - fetcher.appendChild(pluginIdElm); - populateFetcherConfigs(fetcherConfigParams, tikaConfigDoc, fetcher); - fetchersElement.appendChild(fetcher); - } - DOMSource source = new DOMSource(tikaConfigDoc); - FileWriter writer = new FileWriter(tikaConfigPath, StandardCharsets.UTF_8); - StreamResult result = new StreamResult(writer); - - TransformerFactory transformerFactory = TransformerFactory.newInstance(); - Transformer transformer = transformerFactory.newTransformer(); - transformer.transform(source, result); - } - - private void populateFetcherConfigs(Map fetcherConfigParams, - Document tikaConfigDoc, Element fetcher) { - for (var configParam : fetcherConfigParams.entrySet()) { - Element configElm = tikaConfigDoc.createElement(configParam.getKey()); - fetcher.appendChild(configElm); - if (configParam.getValue() instanceof List) { - List configParamVal = (List) configParam.getValue(); - String singularName = configParam.getKey().substring(0, configParam.getKey().length() - 1); - for (Object configParamObj : configParamVal) { - Element childElement = tikaConfigDoc.createElement(singularName); - childElement.setTextContent(Objects.toString(configParamObj)); - configElm.appendChild(childElement); - } - } else { - configElm.setTextContent(Objects.toString(configParam.getValue())); - } - } } +// +// +// private void updateTikaConfig() +// throws ParserConfigurationException, IOException, SAXException, TransformerException { +// Document tikaConfigDoc = +// DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(tikaConfigPath); +// +// Element fetchersElement = (Element) tikaConfigDoc.getElementsByTagName("fetchers").item(0); +// if (fetchersElement == null) { +// fetchersElement = tikaConfigDoc.createElement("fetchers"); +// tikaConfigDoc.getDocumentElement().appendChild(fetchersElement); +// } +// for (int i = 0; i < fetchersElement.getChildNodes().getLength(); ++i) { +// fetchersElement.removeChild(fetchersElement.getChildNodes().item(i)); +// } +// for (var fetcherConfigEntry : expiringFetcherStore.getFetcherConfigs().entrySet()) { +// Fetcher fetcherObject = getFetcher(fetcherConfigEntry.getValue().getFetcherPluginId()); +// Map fetcherConfigParams = OBJECT_MAPPER.convertValue( +// expiringFetcherStore.getFetcherConfigs().get(fetcherConfigEntry.getKey()), +// new TypeReference<>() { +// }); +// Element fetcher = tikaConfigDoc.createElement("fetcher"); +// fetcher.setAttribute("class", fetcherConfigEntry.getValue().getClass().getName()); +// +// Element fetcherIdElm = tikaConfigDoc.createElement("fetcherId"); +// fetcherIdElm.setTextContent(fetcherObject.getPluginId()); +// fetcher.appendChild(fetcherIdElm); +// +// populateFetcherConfigs(fetcherConfigParams, tikaConfigDoc, fetcher); +// fetchersElement.appendChild(fetcher); +// } +// DOMSource source = new DOMSource(tikaConfigDoc); +// FileWriter writer = new FileWriter(tikaConfigPath, StandardCharsets.UTF_8); +// StreamResult result = new StreamResult(writer); +// +// TransformerFactory transformerFactory = TransformerFactory.newInstance(); +// Transformer transformer = transformerFactory.newTransformer(); +// transformer.transform(source, result); +// } +// +// private void populateFetcherConfigs(Map fetcherConfigParams, +// Document tikaConfigDoc, Element fetcher) { +// for (var configParam : fetcherConfigParams.entrySet()) { +// Element configElm = tikaConfigDoc.createElement(configParam.getKey()); +// fetcher.appendChild(configElm); +// if (configParam.getValue() instanceof List) { +// List configParamVal = (List) configParam.getValue(); +// String singularName = configParam.getKey().substring(0, configParam.getKey().length() - 1); +// for (Object configParamObj : configParamVal) { +// Element childElement = tikaConfigDoc.createElement(singularName); +// childElement.setTextContent(Objects.toString(configParamObj)); +// configElm.appendChild(childElement); +// } +// } else { +// configElm.setTextContent(Objects.toString(configParam.getValue())); +// } +// } +// } @Override public void fetchAndParseServerSideStreaming(FetchAndParseRequest request, @@ -268,47 +254,47 @@ private void fetchAndParseImpl(FetchAndParseRequest request, @Override public void saveFetcher(SaveFetcherRequest request, StreamObserver responseObserver) { - SaveFetcherReply reply = - SaveFetcherReply.newBuilder().setFetcherId(request.getFetcherId()).build(); - try { - Map fetcherConfigMap = OBJECT_MAPPER.readValue(request.getFetcherConfigJson(), new TypeReference<>() {}); - Map tikaParamsMap = createTikaParamMap(fetcherConfigMap); - saveFetcher(request.getFetcherId(), request.getPluginId(), fetcherConfigMap, tikaParamsMap); - updateTikaConfig(); - } catch (Exception e) { - throw new TikaGrpcException(e); - } - responseObserver.onNext(reply); - responseObserver.onCompleted(); +// SaveFetcherReply reply = +// SaveFetcherReply.newBuilder().setFetcherId(request.getFetcherId()).build(); +// try { +// Map fetcherConfigMap = OBJECT_MAPPER.readValue(request.getFetcherConfigJson(), new TypeReference<>() {}); +// Map tikaParamsMap = createTikaParamMap(fetcherConfigMap); +// saveFetcher(request.getFetcherId(), request.getPluginId(), fetcherConfigMap, tikaParamsMap); +// updateTikaConfig(); +// } catch (Exception e) { +// throw new TikaGrpcException(e); +// } +// responseObserver.onNext(reply); +// responseObserver.onCompleted(); } private void saveFetcher(String fetcherId, String pluginId, Map paramsMap, Map tikaParamsMap) { - try { - if (paramsMap == null) { - paramsMap = new LinkedHashMap<>(); - } - Fetcher fetcher = getFetcher(pluginId); - Class fetcherClass = fetcher.getClass(); - String configClassName = - fetcherClass.getPackageName() + ".config." + fetcherClass.getSimpleName() + - "Config"; - - Class configClass = - (Class) Class.forName(configClassName, true, fetcher.getClass().getClassLoader()); - FetcherConfig configObject = OBJECT_MAPPER.convertValue(paramsMap, configClass); - if (Initializable.class.isAssignableFrom(fetcherClass)) { - Initializable initializable = (Initializable) fetcher; - initializable.initialize(tikaParamsMap); - } - if (expiringFetcherStore.deleteFetcher(fetcherId)) { - LOG.info("Updating fetcher {}", fetcherId); - } else { - LOG.info("Creating new fetcher {}", fetcherId); - } - expiringFetcherStore.createFetcher(fetcherId, configObject); - } catch (ClassNotFoundException | TikaConfigException e) { - throw new TikaGrpcException("Could not create fetcher", e); - } +// try { +// if (paramsMap == null) { +// paramsMap = new LinkedHashMap<>(); +// } +// Fetcher fetcher = getFetcher(pluginId); +// Class fetcherClass = fetcher.getClass(); +// String configClassName = +// fetcherClass.getPackageName() + ".config." + fetcherClass.getSimpleName() + +// "Config"; +// +// Class configClass = +// (Class) Class.forName(configClassName, true, fetcher.getClass().getClassLoader()); +// FetcherConfig configObject = OBJECT_MAPPER.convertValue(paramsMap, configClass); +// if (Initializable.class.isAssignableFrom(fetcherClass)) { +// Initializable initializable = (Initializable) fetcher; +// initializable.initialize(tikaParamsMap); +// } +// if (expiringFetcherStore.deleteFetcher(fetcherId)) { +// LOG.info("Updating fetcher {}", fetcherId); +// } else { +// LOG.info("Creating new fetcher {}", fetcherId); +// } +// expiringFetcherStore.createFetcher(fetcherId, configObject); +// } catch (ClassNotFoundException | TikaConfigException e) { +// throw new TikaGrpcException("Could not create fetcher", e); +// } } private static Map createTikaParamMap(Map fetcherConfigMap) { @@ -386,11 +372,11 @@ public void deleteFetcher(DeleteFetcherRequest request, StreamObserver responseObserver) { boolean successfulDelete = deleteFetcher(request.getFetcherId()); if (successfulDelete) { - try { - updateTikaConfig(); - } catch (Exception e) { - throw new TikaGrpcException(e); - } +// try { +// updateTikaConfig(); +// } catch (Exception e) { +// throw new TikaGrpcException(e); +// } } responseObserver.onNext(DeleteFetcherReply.newBuilder().setSuccess(successfulDelete).build()); responseObserver.onCompleted(); @@ -398,33 +384,21 @@ public void deleteFetcher(DeleteFetcherRequest request, @Override public void getFetcherConfigJsonSchema(GetFetcherConfigJsonSchemaRequest request, StreamObserver responseObserver) { - GetFetcherConfigJsonSchemaReply.Builder builder = GetFetcherConfigJsonSchemaReply.newBuilder(); - try { - Fetcher fetcher = getFetcher(request.getPluginId()); - JsonSchema jsonSchema = JSON_SCHEMA_GENERATOR.generateSchema(fetcher.getClass()); - builder.setFetcherConfigJsonSchema(OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(jsonSchema)); - } catch (JsonProcessingException e) { - throw new TikaGrpcException("Could not create json schema for fetcher with plugin ID " + request.getPluginId(), e); - } - responseObserver.onNext(builder.build()); - responseObserver.onCompleted(); - } - - private Fetcher getFetcher(String pluginId) { - return pluginManager.getExtensions(Fetcher.class, pluginId) - .stream() - .findFirst() - .orElseThrow(() -> new TikaGrpcException("Could not find Fetcher extension for plugin " + pluginId)); +// GetFetcherConfigJsonSchemaReply.Builder builder = GetFetcherConfigJsonSchemaReply.newBuilder(); +// try { +// Fetcher fetcher = getFetcher(request.getPluginId()); +// JsonSchema jsonSchema = JSON_SCHEMA_GENERATOR.generateSchema(fetcher.getClass()); +// builder.setFetcherConfigJsonSchema(OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(jsonSchema)); +// } catch (JsonProcessingException e) { +// throw new TikaGrpcException("Could not create json schema for fetcher with plugin ID " + request.getPluginId(), e); +// } +// responseObserver.onNext(builder.build()); +// responseObserver.onCompleted(); } @Override public void listFetcherPlugins(ListFetcherPluginsRequest request, StreamObserver responseObserver) { - for (Fetcher fetcher : pluginManager.getExtensions(Fetcher.class)) { - responseObserver.onNext(ListFetcherPluginsReply.newBuilder() - .setFetcherPluginId(fetcher.getPluginId()) - .build()); - } - + // todo } private boolean deleteFetcher(String fetcherName) { diff --git a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java index ec9ace610f..6572a201a6 100644 --- a/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java +++ b/tika-grpc/src/test/java/org/apache/tika/pipes/grpc/TikaGrpcServerTest.java @@ -49,12 +49,10 @@ import io.grpc.stub.StreamObserver; import org.apache.commons.io.FileUtils; import org.jetbrains.annotations.NotNull; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.pf4j.PluginManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -70,7 +68,6 @@ import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.fetcher.fs.FileSystemFetcher; import org.apache.tika.pipes.fetcher.fs.config.FileSystemFetcherConfig; -import org.apache.tika.pipes.grpc.plugin.GrpcPluginManager; @ExtendWith(GrpcCleanupExtension.class) public class TikaGrpcServerTest { @@ -89,21 +86,11 @@ static void init() throws Exception { } static final int NUM_FETCHERS_TO_CREATE = 10; - static PluginManager pluginManager; + static List pluginDirs; @BeforeAll - static void loadPluginManager() throws Exception { - System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. - Path fetchersPath = Path.of("..", "tika-pipes", "tika-fetchers"); - LOG.info("Using pf4j in development mode using plugins dir: {}", fetchersPath.toFile().getCanonicalPath()); - pluginManager = new GrpcPluginManager(fetchersPath); - pluginManager.loadPlugins(); - pluginManager.startPlugins(); - } - - @AfterAll - static void killPluginManager() { - pluginManager.stopPlugins(); + static void loadPluginManager() { + pluginDirs = Collections.singletonList(Path.of("..", "tika-pipes", "tika-fetchers")); } @Test @@ -114,7 +101,7 @@ public void testFetcherCrud(Resources resources) throws Exception { Server server = InProcessServerBuilder .forName(serverName) .directExecutor() - .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath(), pluginManager)) + .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath(), pluginDirs)) .build() .start(); resources.register(server, Duration.ofSeconds(10)); @@ -209,7 +196,7 @@ public void testBiStream(Resources resources) throws Exception { Server server = InProcessServerBuilder .forName(serverName) .directExecutor() - .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath(), pluginManager)) + .addService(new TikaGrpcServerImpl(tikaConfigXml.getAbsolutePath(), pluginDirs)) .build() .start(); resources.register(server, Duration.ofSeconds(10)); diff --git a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java index b32304d69b..1019a1d99d 100644 --- a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java +++ b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java @@ -38,8 +38,12 @@ import com.amazonaws.services.s3.iterable.S3Objects; import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectSummary; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.pf4j.PluginManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; @@ -51,12 +55,26 @@ import org.apache.tika.pipes.fetcher.FetcherManager; import org.apache.tika.pipes.pipesiterator.CallablePipesIterator; import org.apache.tika.pipes.pipesiterator.PipesIterator; +import org.apache.tika.pipes.plugin.TikaPluginManager; @Disabled("turn these into actual tests with mock s3") public class PipeIntegrationTests { + private static final Logger LOG = LoggerFactory.getLogger(PipeIntegrationTests.class); private static final Path OUTDIR = Paths.get(""); + PluginManager pluginManager; + + @BeforeEach + void init() throws IOException { + System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. + Path fetchersPath = Path.of("..", ".."); + LOG.info("Using pf4j in development mode using plugins dir: {}", fetchersPath.toFile().getCanonicalPath()); + pluginManager = new TikaPluginManager(fetchersPath); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); + } + @Test public void testBruteForce() throws Exception { String region = ""; @@ -146,7 +164,7 @@ public void testS3ToS3() throws Exception { } private Fetcher getFetcher(String fileName, String fetcherName) throws Exception { - FetcherManager manager = FetcherManager.load(getPath(fileName)); + FetcherManager manager = FetcherManager.load(pluginManager); return manager.getFetcher(fetcherName); } diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml index 0f16786352..b45912e824 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/pom.xml @@ -52,7 +52,7 @@ ${project.build.directory}/lib compile - tika-core + tika-core,tika-pipes-core diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/test/java/org/apache/tika/pipes/fetcher/azblob/TestAZBlobFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/test/java/org/apache/tika/pipes/fetcher/azblob/TestAZBlobFetcher.java index 1ba2cfdcd1..4527b8b0ef 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/test/java/org/apache/tika/pipes/fetcher/azblob/TestAZBlobFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/test/java/org/apache/tika/pipes/fetcher/azblob/TestAZBlobFetcher.java @@ -21,30 +21,39 @@ import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.StandardCharsets; -import java.nio.file.Paths; +import java.nio.file.Path; import java.util.List; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.pf4j.PluginManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.tika.TikaTest; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.fetcher.FetcherManager; +import org.apache.tika.pipes.plugin.TikaPluginManager; import org.apache.tika.serialization.JsonMetadataList; @Disabled("write actual unit tests") public class TestAZBlobFetcher extends TikaTest { + private static final Logger LOG = LoggerFactory.getLogger(TestAZBlobFetcher.class); private static final String FETCH_STRING = "something-or-other/test-out.json"; @Test public void testConfig() throws Exception { - FetcherManager fetcherManager = FetcherManager.load(Paths.get(this - .getClass() - .getResource("/tika-config-az-blob.xml") - .toURI())); + System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. + Path fetchersPath = Path.of("..", ".."); + LOG.info("Using pf4j in development mode using plugins dir: {}", fetchersPath.toFile().getCanonicalPath()); + PluginManager pluginManager = new TikaPluginManager(fetchersPath); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); + FetcherManager fetcherManager = FetcherManager.load(pluginManager); + Fetcher fetcher = fetcherManager.getFetcher("az-blob"); List metadataList = null; try (Reader reader = new BufferedReader(new InputStreamReader(fetcher.fetch(FETCH_STRING, new Metadata(), new ParseContext()), StandardCharsets.UTF_8))) { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-file-system/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-file-system/pom.xml index 935b0a0065..cf40e05587 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-file-system/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-file-system/pom.xml @@ -49,7 +49,7 @@ ${project.build.directory}/lib compile - tika-core + tika-core,tika-pipes-core diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml index ab40f98b25..dd2a831c97 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml @@ -52,7 +52,7 @@ ${project.build.directory}/lib compile - tika-core + tika-core,tika-pipes-core diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/java/org/apache/tika/pipes/fetcher/s3/TestGCSFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/java/org/apache/tika/pipes/fetcher/s3/TestGCSFetcher.java index e685520507..87b087df7d 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/java/org/apache/tika/pipes/fetcher/s3/TestGCSFetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/java/org/apache/tika/pipes/fetcher/s3/TestGCSFetcher.java @@ -21,21 +21,25 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.pf4j.PluginManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.fetcher.FetcherManager; +import org.apache.tika.pipes.plugin.TikaPluginManager; @Disabled("write actual unit tests") public class TestGCSFetcher { + private static final Logger LOG = LoggerFactory.getLogger(TestGCSFetcher.class); private static final String FETCH_STRING = "testExtraSpaces.pdf"; @@ -48,11 +52,15 @@ public static void setUp() throws Exception { outputFile = Files.createTempFile(TEMP_DIR, "tika-test", ".pdf"); } - @Test public void testConfig() throws Exception { - FetcherManager fetcherManager = FetcherManager.load( - Paths.get(this.getClass().getResource("/tika-config-gcs.xml").toURI())); + System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. + Path fetchersPath = Path.of("..", ".."); + LOG.info("Using pf4j in development mode using plugins dir: {}", fetchersPath.toFile().getCanonicalPath()); + PluginManager pluginManager = new TikaPluginManager(fetchersPath); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); + FetcherManager fetcherManager = FetcherManager.load(pluginManager); Fetcher fetcher = fetcherManager.getFetcher("gcs"); Metadata metadata = new Metadata(); try (InputStream is = fetcher.fetch(FETCH_STRING, metadata, new ParseContext())) { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml index 268aee9d36..d125595a3b 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml @@ -69,7 +69,7 @@ ${project.build.directory}/lib compile - tika-core + tika-core,tika-pipes-core diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java index c888db8ae1..159896546f 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java @@ -28,7 +28,6 @@ import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.security.SecureRandom; import java.util.ArrayList; @@ -60,6 +59,9 @@ import org.junit.jupiter.api.Test; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; +import org.pf4j.PluginManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.tika.TikaTest; import org.apache.tika.client.HttpClientFactory; @@ -74,8 +76,10 @@ import org.apache.tika.pipes.fetcher.http.config.HttpFetcherConfig; import org.apache.tika.pipes.fetcher.http.config.HttpHeaders; import org.apache.tika.pipes.fetcher.http.jwt.JwtGenerator; +import org.apache.tika.pipes.plugin.TikaPluginManager; class HttpFetcherTest extends TikaTest { + private static final Logger LOG = LoggerFactory.getLogger(HttpFetcherTest.class); private static final String TEST_URL = "wontbecalled"; private static final String CONTENT = "request content"; @@ -266,9 +270,13 @@ public void testRange() throws Exception { } FetcherManager getFetcherManager(String path) throws Exception { - return FetcherManager.load(Paths.get(HttpFetcherTest.class - .getResource("/" + path) - .toURI())); + System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. + Path fetchersPath = Path.of("..", ".."); + LOG.info("Using pf4j in development mode using plugins dir: {}", fetchersPath.toFile().getCanonicalPath()); + PluginManager pluginManager = new TikaPluginManager(fetchersPath); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); + return FetcherManager.load(pluginManager); } private void mockClientResponse(final HttpResponse response) throws Exception { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml index 714813c917..d3c6eb6d22 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml @@ -136,7 +136,7 @@ ${project.build.directory}/lib compile - tika-core + tika-core,tika-pipes-core diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml index ceae4f06f6..583e3394a3 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml @@ -56,7 +56,7 @@ ${project.build.directory}/lib compile - tika-core + tika-core,tika-pipes-core diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/test/java/org/apache/tika/pipes/fetcher/s3/TestS3Fetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/test/java/org/apache/tika/pipes/fetcher/s3/TestS3Fetcher.java index 0055bf68a8..d8e4922194 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/test/java/org/apache/tika/pipes/fetcher/s3/TestS3Fetcher.java +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/test/java/org/apache/tika/pipes/fetcher/s3/TestS3Fetcher.java @@ -25,14 +25,20 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.pf4j.PluginManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.fetcher.FetcherManager; +import org.apache.tika.pipes.plugin.TikaPluginManager; @Disabled("write actual unit tests") public class TestS3Fetcher { + private static final Logger LOG = LoggerFactory.getLogger(TestS3Fetcher.class); + private static final String FETCH_STRING = ""; private final Path outputFile = Paths.get(""); private final String region = "us-east-1"; @@ -53,8 +59,14 @@ public void testBasic() throws Exception { @Test public void testConfig() throws Exception { - FetcherManager fetcherManager = FetcherManager.load( - Paths.get(this.getClass().getResource("/tika-config-s3.xml").toURI())); + System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. + Path fetchersPath = Path.of("..", ".."); + LOG.info("Using pf4j in development mode using plugins dir: {}", fetchersPath.toFile().getCanonicalPath()); + PluginManager pluginManager = new TikaPluginManager(fetchersPath); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); + FetcherManager fetcherManager = FetcherManager.load(pluginManager); + Fetcher fetcher = fetcherManager.getFetcher("s3"); Metadata metadata = new Metadata(); try (InputStream is = fetcher.fetch(FETCH_STRING, metadata, new ParseContext())) { diff --git a/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml b/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml index 343d59e088..a446c9e9b4 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-url/pom.xml @@ -46,7 +46,7 @@ ${project.build.directory}/lib compile - tika-core + tika-core,tika-pipes-core diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java index f8dcffb641..5d30d97c3c 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/CompositePipesReporter.java @@ -27,6 +27,7 @@ import org.apache.tika.config.Param; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.pipes.pipesiterator.TotalCountResult; +import org.apache.tika.pipes.reporter.PipesReporter; public class CompositePipesReporter extends PipesReporter implements Initializable { diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesClient.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesClient.java index 0e1ca18e05..bb2a2e4ccc 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesClient.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesClient.java @@ -29,6 +29,7 @@ import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.nio.charset.StandardCharsets; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -561,6 +562,9 @@ private String[] getCommandline() { commandLine.add(Long.toString(pipesConfig.getMaxForEmitBatchBytes())); commandLine.add(Long.toString(pipesConfig.getTimeoutMillis())); commandLine.add(Long.toString(pipesConfig.getShutdownClientAfterMillis())); + for (Path pluginDir : pipesConfig.getPluginDirs()) { + commandLine.add(ProcessUtils.escapeCommandLine(pluginDir.toAbsolutePath().toString())); + } LOG.debug("pipesClientId={}: commandline: {}", pipesClientId, commandLine); return commandLine.toArray(new String[0]); } diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfig.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfig.java index 132e657a74..f8a21124f0 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfig.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfig.java @@ -20,6 +20,8 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; import java.util.Set; import org.slf4j.Logger; @@ -34,6 +36,10 @@ public class PipesConfig extends PipesConfigBase { private long maxWaitForClientMillis = 60000; public static PipesConfig load(Path tikaConfig) throws IOException, TikaConfigException { + return load(tikaConfig, new ArrayList<>()); + } + + public static PipesConfig load(Path tikaConfig, List pluginDirs) throws IOException, TikaConfigException { PipesConfig pipesConfig = new PipesConfig(); try (InputStream is = Files.newInputStream(tikaConfig)) { Set settings = pipesConfig.configure("pipes", is); diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfigBase.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfigBase.java index 83ad11e9ed..ad97740946 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfigBase.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesConfigBase.java @@ -60,6 +60,7 @@ public class PipesConfigBase extends ConfigBase { private int staleFetcherDelaySeconds = DEFAULT_STALE_FETCHER_DELAY_SECONDS; private List forkedJvmArgs = new ArrayList<>(); private Path tikaConfig; + private List pluginDirs; private String javaPath = "java"; public long getTimeoutMillis() { @@ -189,4 +190,12 @@ public int getStaleFetcherDelaySeconds() { public void setStaleFetcherDelaySeconds(int staleFetcherDelaySeconds) { this.staleFetcherDelaySeconds = staleFetcherDelaySeconds; } + + public List getPluginDirs() { + return pluginDirs; + } + + public void setPluginDirs(List pluginDirs) { + this.pluginDirs = pluginDirs; + } } diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesServer.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesServer.java index 1c0c204c78..8fd82af364 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesServer.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesServer.java @@ -28,6 +28,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Optional; @@ -35,6 +36,7 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream; import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; +import org.pf4j.PluginManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.ContentHandler; @@ -74,6 +76,7 @@ import org.apache.tika.pipes.extractor.EmittingEmbeddedDocumentBytesHandler; import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.fetcher.FetcherManager; +import org.apache.tika.pipes.plugin.TikaPluginManager; import org.apache.tika.sax.BasicContentHandlerFactory; import org.apache.tika.sax.ContentHandlerFactory; import org.apache.tika.sax.RecursiveParserWrapperHandler; @@ -89,7 +92,6 @@ * the PipesClient. */ public class PipesServer implements Runnable { - private static final Logger LOG = LoggerFactory.getLogger(PipesServer.class); //this has to be some number not close to 0-3 @@ -143,10 +145,12 @@ public static STATUS lookup(int val) { private volatile boolean parsing; private volatile long since; + private PluginManager pluginManager; + private List pluginDirs; public PipesServer(Path tikaConfigPath, InputStream in, PrintStream out, long maxForEmitBatchBytes, long serverParseTimeoutMillis, - long serverWaitTimeoutMillis) + long serverWaitTimeoutMillis, List pluginDirs) throws IOException, TikaException, SAXException { this.tikaConfigPath = tikaConfigPath; this.input = new DataInputStream(in); @@ -156,19 +160,23 @@ public PipesServer(Path tikaConfigPath, InputStream in, PrintStream out, this.serverWaitTimeoutMillis = serverWaitTimeoutMillis; this.parsing = false; this.since = System.currentTimeMillis(); + this.pluginDirs = pluginDirs; } - public static void main(String[] args) throws Exception { try { Path tikaConfig = Paths.get(args[0]); long maxForEmitBatchBytes = Long.parseLong(args[1]); long serverParseTimeoutMillis = Long.parseLong(args[2]); long serverWaitTimeoutMillis = Long.parseLong(args[3]); + List pluginPaths = new ArrayList<>(); + for (int i = 4; i < args.length; ++i) { + pluginPaths.add(Paths.get(args[i])); + } PipesServer server = new PipesServer(tikaConfig, System.in, System.out, maxForEmitBatchBytes, - serverParseTimeoutMillis, serverWaitTimeoutMillis); + serverParseTimeoutMillis, serverWaitTimeoutMillis, pluginPaths); System.setIn(UnsynchronizedByteArrayInputStream.builder().setByteArray(new byte[0]).get()); System.setOut(System.err); Thread watchdog = new Thread(server, "Tika Watchdog"); @@ -455,10 +463,6 @@ private Fetcher getFetcher(FetchEmitTuple t) { LOG.warn(noFetcherMsg); write(STATUS.FETCHER_NOT_FOUND, noFetcherMsg); return null; - } catch (IOException | TikaException e) { - LOG.warn("Couldn't initialize fetcher for fetch id '" + t.getId() + "'", e); - write(STATUS.FETCHER_INITIALIZATION_EXCEPTION, ExceptionUtils.getStackTrace(e)); - return null; } } @@ -743,8 +747,11 @@ private FetchEmitTuple readFetchEmitTuple() { protected void initializeResources() throws TikaException, IOException, SAXException { //TODO allowed named configurations in tika config + pluginManager = pluginDirs == null || pluginDirs.isEmpty() ? new TikaPluginManager() : new TikaPluginManager(pluginDirs); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); this.tikaConfig = new TikaConfig(tikaConfigPath); - this.fetcherManager = FetcherManager.load(tikaConfigPath); + this.fetcherManager = FetcherManager.load(pluginManager); //skip initialization of the emitters if emitting //from the pipesserver is turned off. if (maxForEmitBatchBytes > -1) { diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncConfig.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncConfig.java index bc55cca5db..b13ba64d96 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncConfig.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncConfig.java @@ -23,7 +23,7 @@ import org.apache.tika.exception.TikaConfigException; import org.apache.tika.pipes.PipesConfigBase; -import org.apache.tika.pipes.PipesReporter; +import org.apache.tika.pipes.reporter.PipesReporter; public class AsyncConfig extends PipesConfigBase { diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java index 3a6751f4ff..e0939f6a2b 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java @@ -37,13 +37,13 @@ import org.apache.tika.pipes.FetchEmitTuple; import org.apache.tika.pipes.PipesClient; import org.apache.tika.pipes.PipesException; -import org.apache.tika.pipes.PipesReporter; import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.emitter.EmitData; import org.apache.tika.pipes.emitter.EmitterManager; import org.apache.tika.pipes.pipesiterator.PipesIterator; import org.apache.tika.pipes.pipesiterator.TotalCountResult; import org.apache.tika.pipes.pipesiterator.TotalCounter; +import org.apache.tika.pipes.reporter.PipesReporter; /** * This is the main class for handling async requests. This manages diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/Emitter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/Emitter.java index c748541afb..c8b98de177 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/Emitter.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/Emitter.java @@ -19,10 +19,12 @@ import java.io.IOException; import java.util.List; +import org.pf4j.ExtensionPoint; + import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; -public interface Emitter { +public interface Emitter extends ExtensionPoint { String getName(); diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/exception/PipesRuntimeException.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/exception/PipesRuntimeException.java new file mode 100644 index 0000000000..45f4982b86 --- /dev/null +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/exception/PipesRuntimeException.java @@ -0,0 +1,22 @@ +package org.apache.tika.pipes.exception; + +public class PipesRuntimeException extends RuntimeException { + public PipesRuntimeException() { + } + + public PipesRuntimeException(String message) { + super(message); + } + + public PipesRuntimeException(String message, Throwable cause) { + super(message, cause); + } + + public PipesRuntimeException(Throwable cause) { + super(cause); + } + + public PipesRuntimeException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java index 007b052f18..d0a532b499 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java @@ -16,18 +16,15 @@ */ package org.apache.tika.pipes.fetcher; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.Map; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +import org.pf4j.PluginManager; import org.apache.tika.config.ConfigBase; import org.apache.tika.exception.TikaConfigException; -import org.apache.tika.exception.TikaException; +import org.apache.tika.pipes.exception.PipesRuntimeException; +import org.apache.tika.pipes.plugin.TikaPluginManager; /** * Utility class to hold multiple fetchers. @@ -35,42 +32,32 @@ * This forbids multiple fetchers supporting the same name. */ public class FetcherManager extends ConfigBase { + private final PluginManager pluginManager; + + public FetcherManager() throws TikaConfigException { + pluginManager = new TikaPluginManager(); + } - public static FetcherManager load(Path p) throws IOException, TikaConfigException { - try (InputStream is = - Files.newInputStream(p)) { - return FetcherManager.buildComposite("fetchers", FetcherManager.class, - "fetcher", Fetcher.class, is); - } + public FetcherManager(PluginManager pluginManager) { + this.pluginManager = pluginManager; } - private final Map fetcherMap = new ConcurrentHashMap<>(); - public FetcherManager(List fetchers) throws TikaConfigException { - for (Fetcher fetcher : fetchers) { - String name = fetcher.getPluginId(); - if (name == null || name.trim().length() == 0) { - throw new TikaConfigException("fetcher name must not be blank"); - } - if (fetcherMap.containsKey(fetcher.getPluginId())) { - throw new TikaConfigException( - "Multiple fetchers cannot support the same prefix: " + fetcher.getPluginId()); - } - fetcherMap.put(fetcher.getPluginId(), fetcher); - } + public static FetcherManager load(PluginManager pluginManager) { + return new FetcherManager(pluginManager); } - public Fetcher getFetcher(String fetcherName) throws IOException, TikaException { - Fetcher fetcher = fetcherMap.get(fetcherName); - if (fetcher == null) { - throw new IllegalArgumentException( - "Can't find fetcher for fetcherName: " + fetcherName + ". I've loaded: " + - fetcherMap.keySet()); - } - return fetcher; + public Fetcher getFetcher(String pluginId) { + return pluginManager.getExtensions(Fetcher.class, pluginId) + .stream() + .findFirst() + .orElseThrow(() -> new PipesRuntimeException("Could not find Fetcher extension for plugin " + pluginId)); } public Set getSupported() { - return fetcherMap.keySet(); + return pluginManager.getExtensions(Fetcher.class) + .stream() + .map(Fetcher::getPluginId) + .collect(Collectors.toSet()); } /** @@ -80,17 +67,9 @@ public Set getSupported() { * @return */ public Fetcher getFetcher() { - if (fetcherMap.size() == 0) { - throw new IllegalArgumentException("fetchers size must == 1 for the no arg call"); - } - if (fetcherMap.size() > 1) { - throw new IllegalArgumentException("need to specify 'fetcherName' if > 1 fetchers are" + - " available"); - } - for (Fetcher fetcher : fetcherMap.values()) { - return fetcher; - } - //this should be unreachable?! - throw new IllegalArgumentException("fetchers size must == 0"); + return pluginManager.getExtensions(Fetcher.class) + .stream() + .findFirst() + .orElseThrow(() -> new PipesRuntimeException("Could not find any instances of the Fetcher extension")); } } diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java index c029f3d6f1..400b644005 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/fetcher/config/FetcherConfig.java @@ -21,6 +21,10 @@ public abstract class FetcherConfig { abstract public String getFetcherPluginId(); + public void setFetcherPluginId(String fetcherPluginId) { + // no op - we put this here to appease the ConfigBase + } + public String getFetcherId() { return fetcherId; } diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java index 34706f7e88..e142ffc2b0 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIterator.java @@ -41,6 +41,7 @@ import org.apache.tika.exception.TikaTimeoutException; import org.apache.tika.pipes.FetchEmitTuple; import org.apache.tika.pipes.HandlerConfig; +import org.apache.tika.pipes.pipesiterator.fs.IPipesIterator; import org.apache.tika.sax.BasicContentHandlerFactory; /** @@ -51,7 +52,7 @@ * next() is called after hasNext() has returned false. */ public abstract class PipesIterator extends ConfigBase - implements Callable, Iterable, Initializable { + implements IPipesIterator, Callable, Iterable, Initializable { public static final long DEFAULT_MAX_WAIT_MS = 300_000; public static final int DEFAULT_QUEUE_SIZE = 1000; @@ -177,9 +178,7 @@ protected HandlerConfig getHandlerConfig() { return new HandlerConfig(handlerType, parseMode, writeLimit, maxEmbeddedResources, throwOnWriteLimitReached); } - - protected abstract void enqueue() throws IOException, TimeoutException, InterruptedException; - + protected void tryToAdd(FetchEmitTuple p) throws InterruptedException, TimeoutException { added++; boolean offered = queue.offer(p, maxWaitMs, TimeUnit.MILLISECONDS); diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIterator.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIterator.java index 75cb8390cc..c28db99890 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIterator.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/filelist/FileListPipesIterator.java @@ -59,7 +59,7 @@ public class FileListPipesIterator extends PipesIterator implements Initializabl private Path fileListPath; @Override - protected void enqueue() throws IOException, TimeoutException, InterruptedException { + public void enqueue() throws IOException, TimeoutException, InterruptedException { try (BufferedReader reader = Files.newBufferedReader(fileListPath, StandardCharsets.UTF_8)) { if (hasHeader) { reader.readLine(); diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/FileSystemPipesIterator.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/FileSystemPipesIterator.java index 967df73b99..34a61b428a 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/FileSystemPipesIterator.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/FileSystemPipesIterator.java @@ -72,7 +72,7 @@ public void setBasePath(String basePath) { } @Override - protected void enqueue() throws InterruptedException, IOException, TimeoutException { + public void enqueue() throws InterruptedException, IOException, TimeoutException { if (!Files.isDirectory(basePath)) { throw new IllegalArgumentException( "\"basePath\" directory does not exist: " + basePath.toAbsolutePath()); diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/IPipesIterator.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/IPipesIterator.java new file mode 100644 index 0000000000..acd34f23c4 --- /dev/null +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/pipesiterator/fs/IPipesIterator.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.pipesiterator.fs; + +import java.io.IOException; +import java.util.concurrent.TimeoutException; + +import org.pf4j.ExtensionPoint; + +public interface IPipesIterator extends ExtensionPoint { + void enqueue() throws IOException, TimeoutException, InterruptedException; +} diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/plugin/ClasspathPluginPropertiesFinder.java similarity index 97% rename from tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/plugin/ClasspathPluginPropertiesFinder.java index 2e9552d36c..2d1a0a3fd6 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/ClasspathPluginPropertiesFinder.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/plugin/ClasspathPluginPropertiesFinder.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.tika.pipes.grpc.plugin; +package org.apache.tika.pipes.plugin; import java.nio.file.Path; import java.nio.file.Paths; diff --git a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/plugin/TikaPluginManager.java similarity index 84% rename from tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/plugin/TikaPluginManager.java index 0e35219772..b71bae1dd0 100644 --- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/plugin/GrpcPluginManager.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/plugin/TikaPluginManager.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.tika.pipes.grpc.plugin; +package org.apache.tika.pipes.plugin; import java.nio.file.Path; import java.util.List; @@ -26,19 +26,19 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.tika.pipes.exception.PipesRuntimeException; import org.apache.tika.pipes.fetcher.Fetcher; -import org.apache.tika.pipes.grpc.exception.TikaGrpcException; -public class GrpcPluginManager extends DefaultPluginManager { - private static final Logger LOGGER = LoggerFactory.getLogger(GrpcPluginManager.class); - public GrpcPluginManager() { +public class TikaPluginManager extends DefaultPluginManager { + private static final Logger LOGGER = LoggerFactory.getLogger(TikaPluginManager.class); + public TikaPluginManager() { } - public GrpcPluginManager(Path... pluginsRoots) { + public TikaPluginManager(Path... pluginsRoots) { super(pluginsRoots); } - public GrpcPluginManager(List pluginsRoots) { + public TikaPluginManager(List pluginsRoots) { super(pluginsRoots); } @@ -70,7 +70,7 @@ public void startPlugins() { private void checkFetcherExtensions(PluginWrapper plugin) { for (Class extensionClass : getExtensionClasses(Fetcher.class, plugin.getPluginId())) { if (!Fetcher.class.isAssignableFrom(extensionClass)) { - throw new TikaGrpcException("Something is wrong with the classpath. " + Fetcher.class.getName() + + throw new PipesRuntimeException("Something is wrong with the classpath. " + Fetcher.class.getName() + " should be assignable from " + extensionClass.getName() + ". Did tika-core accidentally get in your plugin lib?"); } diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesReporter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/reporter/PipesReporter.java similarity index 95% rename from tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesReporter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/reporter/PipesReporter.java index 3978039b40..1a9c165d03 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesReporter.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/reporter/PipesReporter.java @@ -14,11 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.tika.pipes; +package org.apache.tika.pipes.reporter; import java.io.Closeable; import java.io.IOException; +import org.apache.tika.pipes.FetchEmitTuple; +import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.pipesiterator.TotalCountResult; /** diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesReporterBase.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/reporter/PipesReporterBase.java similarity index 98% rename from tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesReporterBase.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/reporter/PipesReporterBase.java index 3dcddfa71e..6cdf89767f 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/PipesReporterBase.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/reporter/PipesReporterBase.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.tika.pipes; +package org.apache.tika.pipes.reporter; import java.util.HashSet; import java.util.List; @@ -26,6 +26,7 @@ import org.apache.tika.config.InitializableProblemHandler; import org.apache.tika.config.Param; import org.apache.tika.exception.TikaConfigException; +import org.apache.tika.pipes.PipesResult; /** * Base class that includes filtering by {@link PipesResult.STATUS} diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/LoggingPipesReporter.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/reporter/logging/LoggingPipesReporter.java similarity index 88% rename from tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/LoggingPipesReporter.java rename to tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/reporter/logging/LoggingPipesReporter.java index 5f00880ba0..fe61d86ddc 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/LoggingPipesReporter.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/reporter/logging/LoggingPipesReporter.java @@ -14,12 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.tika.pipes; +package org.apache.tika.pipes.reporter.logging; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.tika.pipes.FetchEmitTuple; +import org.apache.tika.pipes.PipesResult; +import org.apache.tika.pipes.reporter.PipesReporter; + /** * Simple PipesReporter that logs everything at the debug level. */ diff --git a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java index 579f298d64..6fd2d39c89 100644 --- a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java +++ b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java @@ -29,6 +29,9 @@ import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.pf4j.PluginManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.tika.TikaTest; import org.apache.tika.extractor.BasicEmbeddedDocumentBytesHandler; @@ -37,10 +40,11 @@ import org.apache.tika.parser.ParseContext; import org.apache.tika.pipes.emitter.EmitKey; import org.apache.tika.pipes.fetcher.FetchKey; -import org.apache.tika.pipes.fetcher.Fetcher; import org.apache.tika.pipes.fetcher.FetcherManager; +import org.apache.tika.pipes.plugin.TikaPluginManager; public class PipesServerTest extends TikaTest { + private static final Logger LOG = LoggerFactory.getLogger(PipesServerTest.class); /** * This test is useful for stepping through the debugger on PipesServer @@ -64,16 +68,24 @@ public void testBasic(@TempDir Path tmp) throws Exception { UnsynchronizedByteArrayInputStream.builder().setByteArray(new byte[0]).get(), new PrintStream(UnsynchronizedByteArrayOutputStream.builder().get(), true, StandardCharsets.UTF_8.name()), - -1, 30000, 30000); + -1, 30000, 30000, null); pipesServer.initializeResources(); FetchEmitTuple fetchEmitTuple = new FetchEmitTuple("id", new FetchKey("fs", "mock.xml"), new EmitKey("", "")); - Fetcher fetcher = FetcherManager.load(tikaConfig).getFetcher(); + + System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. + Path fetchersPath = Path.of("..", "tika-fetchers"); + LOG.info("Using pf4j in development mode using plugins dir: {}", fetchersPath.toFile().getCanonicalPath()); + PluginManager pluginManager = new TikaPluginManager(fetchersPath); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); + FetcherManager fetcherManager = FetcherManager.load(pluginManager); + PipesServer.MetadataListAndEmbeddedBytes - parseData = pipesServer.parseFromTuple(fetchEmitTuple, fetcher); + parseData = pipesServer.parseFromTuple(fetchEmitTuple, fetcherManager.getFetcher("file-system-fetcher")); assertEquals("5f3b924303e960ce35d7f705e91d3018dd110a9c3cef0546a91fe013d6dad6fd", parseData.metadataList.get(0).get("X-TIKA:digest:SHA-256")); } @@ -99,7 +111,7 @@ public void testEmbeddedStreamEmitter(@TempDir Path tmp) throws Exception { UnsynchronizedByteArrayInputStream.builder().setByteArray(new byte[0]).get(), new PrintStream(UnsynchronizedByteArrayOutputStream.builder().get(), true, StandardCharsets.UTF_8.name()), - -1, 30000, 30000); + -1, 30000, 30000, null); pipesServer.initializeResources(); EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig = @@ -111,9 +123,16 @@ public void testEmbeddedStreamEmitter(@TempDir Path tmp) throws Exception { FetchEmitTuple fetchEmitTuple = new FetchEmitTuple("id", new FetchKey("fs", "mock.xml"), new EmitKey("", ""), new Metadata(), parseContext); - Fetcher fetcher = FetcherManager.load(tikaConfig).getFetcher(); + System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. + Path fetchersPath = Path.of("..", ".."); + LOG.info("Using pf4j in development mode using plugins dir: {}", fetchersPath.toFile().getCanonicalPath()); + PluginManager pluginManager = new TikaPluginManager(fetchersPath); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); + FetcherManager fetcherManager = FetcherManager.load(pluginManager); + PipesServer.MetadataListAndEmbeddedBytes - parseData = pipesServer.parseFromTuple(fetchEmitTuple, fetcher); + parseData = pipesServer.parseFromTuple(fetchEmitTuple, fetcherManager.getFetcher()); assertEquals(2, parseData.metadataList.size()); byte[] bytes0 = @@ -155,7 +174,7 @@ public void testEmbeddedStreamEmitterLimitBytes(@TempDir Path tmp) throws Except UnsynchronizedByteArrayInputStream.builder().setByteArray(new byte[0]).get(), new PrintStream(UnsynchronizedByteArrayOutputStream.builder().get(), true, StandardCharsets.UTF_8.name()), - -1, 30000, 30000); + -1, 30000, 30000, null); pipesServer.initializeResources(); EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig = @@ -168,9 +187,16 @@ public void testEmbeddedStreamEmitterLimitBytes(@TempDir Path tmp) throws Except new FetchKey("fs", "mock.xml"), new EmitKey("", ""), new Metadata(), parseContext); - Fetcher fetcher = FetcherManager.load(tikaConfig).getFetcher(); + System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. + Path fetchersPath = Path.of("..", ".."); + LOG.info("Using pf4j in development mode using plugins dir: {}", fetchersPath.toFile().getCanonicalPath()); + PluginManager pluginManager = new TikaPluginManager(fetchersPath); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); + FetcherManager fetcherManager = FetcherManager.load(pluginManager); + PipesServer.MetadataListAndEmbeddedBytes - parseData = pipesServer.parseFromTuple(fetchEmitTuple, fetcher); + parseData = pipesServer.parseFromTuple(fetchEmitTuple, fetcherManager.getFetcher()); assertEquals(2, parseData.metadataList.size()); byte[] bytes0 = diff --git a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporter.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporter.java index 6e8308c895..2a59859785 100644 --- a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporter.java +++ b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporter.java @@ -20,8 +20,8 @@ import org.apache.tika.config.Field; import org.apache.tika.pipes.FetchEmitTuple; -import org.apache.tika.pipes.PipesReporter; import org.apache.tika.pipes.PipesResult; +import org.apache.tika.pipes.reporter.PipesReporter; public class MockReporter extends PipesReporter { diff --git a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java index 9bfcd55918..99dd94edb9 100644 --- a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java +++ b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java @@ -26,7 +26,7 @@ import org.junit.jupiter.api.Test; import org.apache.tika.pipes.CompositePipesReporter; -import org.apache.tika.pipes.PipesReporter; +import org.apache.tika.pipes.reporter.PipesReporter; public class MockReporterTest { diff --git a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/config/TikaPipesConfigTest.java b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/config/TikaPipesConfigTest.java index 3f1a2e584c..392dbc611f 100644 --- a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/config/TikaPipesConfigTest.java +++ b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/config/TikaPipesConfigTest.java @@ -29,41 +29,15 @@ import org.apache.tika.config.AbstractTikaConfigTest; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.pipes.CompositePipesReporter; -import org.apache.tika.pipes.PipesReporter; import org.apache.tika.pipes.async.AsyncConfig; import org.apache.tika.pipes.async.MockReporter; import org.apache.tika.pipes.emitter.Emitter; import org.apache.tika.pipes.emitter.EmitterManager; -import org.apache.tika.pipes.fetcher.FetcherManager; import org.apache.tika.pipes.pipesiterator.PipesIterator; +import org.apache.tika.pipes.reporter.PipesReporter; public class TikaPipesConfigTest extends AbstractTikaConfigTest { //this handles tests for the newer pipes type configs. - @Test - public void testDuplicateFetchers() throws Exception { - //can't have two fetchers with the same name - assertThrows(TikaConfigException.class, () -> { - FetcherManager.load(getConfigFilePath("fetchers-duplicate-config.xml")); - }); - } - - @Test - public void testNoNameFetchers() throws Exception { - //can't have two fetchers with an empty name - assertThrows(TikaConfigException.class, () -> { - FetcherManager.load(getConfigFilePath("fetchers-noname-config.xml")); - }); - } - - @Test - public void testNoBasePathFetchers() throws Exception { - //no basepath is allowed as of > 2.3.0 - //test that this does not throw an exception. - - FetcherManager fetcherManager = FetcherManager.load( - getConfigFilePath("fetchers-nobasepath-config.xml")); - } - @Test public void testEmitters() throws Exception { EmitterManager emitterManager = diff --git a/tika-pipes/tika-pipes-iterators/pom.xml b/tika-pipes/tika-pipes-iterators/pom.xml index 5e85241b5c..44f46a27b9 100644 --- a/tika-pipes/tika-pipes-iterators/pom.xml +++ b/tika-pipes/tika-pipes-iterators/pom.xml @@ -50,6 +50,12 @@ tika-pipes-core ${project.version} + + org.pf4j + pf4j + + provided + diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-az-blob/src/main/java/org/apache/tika/pipes/pipesiterator/azblob/AZBlobPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-az-blob/src/main/java/org/apache/tika/pipes/pipesiterator/azblob/AZBlobPipesIterator.java index 0c5d6840dc..f0bb0373fe 100644 --- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-az-blob/src/main/java/org/apache/tika/pipes/pipesiterator/azblob/AZBlobPipesIterator.java +++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-az-blob/src/main/java/org/apache/tika/pipes/pipesiterator/azblob/AZBlobPipesIterator.java @@ -86,7 +86,7 @@ public void setPrefix(String prefix) { } @Override - protected void enqueue() throws InterruptedException, IOException, TimeoutException { + public void enqueue() throws InterruptedException, IOException, TimeoutException { String fetcherName = getFetcherName(); String emitterName = getEmitterName(); long start = System.currentTimeMillis(); diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-csv/src/main/java/org/apache/tika/pipes/pipesiterator/csv/CSVPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-csv/src/main/java/org/apache/tika/pipes/pipesiterator/csv/CSVPipesIterator.java index e9c0065700..77cd4da3e8 100644 --- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-csv/src/main/java/org/apache/tika/pipes/pipesiterator/csv/CSVPipesIterator.java +++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-csv/src/main/java/org/apache/tika/pipes/pipesiterator/csv/CSVPipesIterator.java @@ -112,7 +112,7 @@ public void setCsvPath(Path csvPath) { } @Override - protected void enqueue() throws InterruptedException, IOException, TimeoutException { + public void enqueue() throws InterruptedException, IOException, TimeoutException { String fetcherName = getFetcherName(); String emitterName = getEmitterName(); try (Reader reader = Files.newBufferedReader(csvPath, charset)) { diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/main/java/org/apache/tika/pipes/pipesiterator/gcs/GCSPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/main/java/org/apache/tika/pipes/pipesiterator/gcs/GCSPipesIterator.java index 248d2461e1..4a15a44b25 100644 --- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/main/java/org/apache/tika/pipes/pipesiterator/gcs/GCSPipesIterator.java +++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-gcs/src/main/java/org/apache/tika/pipes/pipesiterator/gcs/GCSPipesIterator.java @@ -91,7 +91,7 @@ public void checkInitialization(InitializableProblemHandler problemHandler) thro } @Override - protected void enqueue() throws InterruptedException, IOException, TimeoutException { + public void enqueue() throws InterruptedException, IOException, TimeoutException { String fetcherName = getFetcherName(); String emitterName = getEmitterName(); long start = System.currentTimeMillis(); diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/src/main/java/org/apache/tika/pipes/pipesiterator/jdbc/JDBCPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/src/main/java/org/apache/tika/pipes/pipesiterator/jdbc/JDBCPipesIterator.java index 2c178e1475..cdc647b89e 100644 --- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/src/main/java/org/apache/tika/pipes/pipesiterator/jdbc/JDBCPipesIterator.java +++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/src/main/java/org/apache/tika/pipes/pipesiterator/jdbc/JDBCPipesIterator.java @@ -139,7 +139,7 @@ public void setQueryTimeoutSeconds(int seconds) { } @Override - protected void enqueue() throws InterruptedException, IOException, TimeoutException { + public void enqueue() throws InterruptedException, IOException, TimeoutException { String fetcherName = getFetcherName(); String emitterName = getEmitterName(); FetchEmitKeyIndices fetchEmitKeyIndices = null; diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-json/src/main/java/org/apache/tika/pipes/pipesiterator/json/JsonPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-json/src/main/java/org/apache/tika/pipes/pipesiterator/json/JsonPipesIterator.java index 6d3ceb6c28..3e190a091c 100644 --- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-json/src/main/java/org/apache/tika/pipes/pipesiterator/json/JsonPipesIterator.java +++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-json/src/main/java/org/apache/tika/pipes/pipesiterator/json/JsonPipesIterator.java @@ -45,7 +45,7 @@ public class JsonPipesIterator extends PipesIterator implements Initializable { private Path jsonPath; @Override - protected void enqueue() throws InterruptedException, IOException, TimeoutException { + public void enqueue() throws InterruptedException, IOException, TimeoutException { try (BufferedReader reader = Files.newBufferedReader(jsonPath, StandardCharsets.UTF_8)) { String line = reader.readLine(); while (line != null) { diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-kafka/src/main/java/org/apache/tika/pipes/pipesiterator/kafka/KafkaPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-kafka/src/main/java/org/apache/tika/pipes/pipesiterator/kafka/KafkaPipesIterator.java index 9fbebcfdaf..4589c97de0 100644 --- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-kafka/src/main/java/org/apache/tika/pipes/pipesiterator/kafka/KafkaPipesIterator.java +++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-kafka/src/main/java/org/apache/tika/pipes/pipesiterator/kafka/KafkaPipesIterator.java @@ -147,7 +147,7 @@ public void checkInitialization(InitializableProblemHandler problemHandler) thro } @Override - protected void enqueue() throws InterruptedException, TimeoutException { + public void enqueue() throws InterruptedException, TimeoutException { String fetcherName = getFetcherName(); String emitterName = getEmitterName(); long start = System.currentTimeMillis(); diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/src/main/java/org/apache/tika/pipes/pipesiterator/s3/S3PipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/src/main/java/org/apache/tika/pipes/pipesiterator/s3/S3PipesIterator.java index 38fc1889cf..e11ff614de 100644 --- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/src/main/java/org/apache/tika/pipes/pipesiterator/s3/S3PipesIterator.java +++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/src/main/java/org/apache/tika/pipes/pipesiterator/s3/S3PipesIterator.java @@ -181,7 +181,7 @@ public void checkInitialization(InitializableProblemHandler problemHandler) thro } @Override - protected void enqueue() throws InterruptedException, IOException, TimeoutException { + public void enqueue() throws InterruptedException, IOException, TimeoutException { String fetcherName = getFetcherName(); String emitterName = getEmitterName(); long start = System.currentTimeMillis(); diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/src/main/java/org/apache/tika/pipes/pipesiterator/solr/SolrPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/src/main/java/org/apache/tika/pipes/pipesiterator/solr/SolrPipesIterator.java index 9ecead289b..a02a1d4e0d 100644 --- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/src/main/java/org/apache/tika/pipes/pipesiterator/solr/SolrPipesIterator.java +++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/src/main/java/org/apache/tika/pipes/pipesiterator/solr/SolrPipesIterator.java @@ -170,7 +170,7 @@ public void setProxyPort(int proxyPort) { } @Override - protected void enqueue() throws InterruptedException, IOException, TimeoutException { + public void enqueue() throws InterruptedException, IOException, TimeoutException { String fetcherName = getFetcherName(); String emitterName = getEmitterName(); diff --git a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-fs-status/src/main/java/org/apache/tika/pipes/reporters/fs/FileSystemStatusReporter.java b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-fs-status/src/main/java/org/apache/tika/pipes/reporters/fs/FileSystemStatusReporter.java index b48745a6c6..6b456182ce 100644 --- a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-fs-status/src/main/java/org/apache/tika/pipes/reporters/fs/FileSystemStatusReporter.java +++ b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-fs-status/src/main/java/org/apache/tika/pipes/reporters/fs/FileSystemStatusReporter.java @@ -40,10 +40,10 @@ import org.apache.tika.config.Param; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.pipes.FetchEmitTuple; -import org.apache.tika.pipes.PipesReporter; import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.async.AsyncStatus; import org.apache.tika.pipes.pipesiterator.TotalCountResult; +import org.apache.tika.pipes.reporter.PipesReporter; import org.apache.tika.utils.ExceptionUtils; /** diff --git a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-fs-status/src/test/java/org/apache/tika/pipes/reporters/fs/TestFileSystemStatusReporter.java b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-fs-status/src/test/java/org/apache/tika/pipes/reporters/fs/TestFileSystemStatusReporter.java index 16296fa1cf..42684e5775 100644 --- a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-fs-status/src/test/java/org/apache/tika/pipes/reporters/fs/TestFileSystemStatusReporter.java +++ b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-fs-status/src/test/java/org/apache/tika/pipes/reporters/fs/TestFileSystemStatusReporter.java @@ -40,11 +40,11 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import org.apache.tika.pipes.PipesReporter; import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.async.AsyncStatus; import org.apache.tika.pipes.pipesiterator.PipesIterator; import org.apache.tika.pipes.pipesiterator.TotalCountResult; +import org.apache.tika.pipes.reporter.PipesReporter; public class TestFileSystemStatusReporter { diff --git a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/main/java/org/apache/tika/pipes/reporters/jdbc/JDBCPipesReporter.java b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/main/java/org/apache/tika/pipes/reporters/jdbc/JDBCPipesReporter.java index e31c0dc5a2..0c4f671db8 100644 --- a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/main/java/org/apache/tika/pipes/reporters/jdbc/JDBCPipesReporter.java +++ b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/main/java/org/apache/tika/pipes/reporters/jdbc/JDBCPipesReporter.java @@ -43,8 +43,8 @@ import org.apache.tika.config.Param; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.pipes.FetchEmitTuple; -import org.apache.tika.pipes.PipesReporterBase; import org.apache.tika.pipes.PipesResult; +import org.apache.tika.pipes.reporter.PipesReporterBase; import org.apache.tika.utils.StringUtils; /** diff --git a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/java/org/apache/tika/pipes/reporters/jdbc/TestJDBCPipesReporter.java b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/java/org/apache/tika/pipes/reporters/jdbc/TestJDBCPipesReporter.java index 01d903c5ef..54dab5cfe4 100644 --- a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/java/org/apache/tika/pipes/reporters/jdbc/TestJDBCPipesReporter.java +++ b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/java/org/apache/tika/pipes/reporters/jdbc/TestJDBCPipesReporter.java @@ -49,12 +49,12 @@ import org.junit.jupiter.api.io.TempDir; import org.apache.tika.pipes.FetchEmitTuple; -import org.apache.tika.pipes.PipesReporter; import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.async.AsyncConfig; import org.apache.tika.pipes.emitter.EmitKey; import org.apache.tika.pipes.fetcher.FetchKey; import org.apache.tika.pipes.pipesiterator.TotalCountResult; +import org.apache.tika.pipes.reporter.PipesReporter; public class TestJDBCPipesReporter { diff --git a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-opensearch/src/main/java/org/apache/tika/pipes/reporters/opensearch/OpenSearchPipesReporter.java b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-opensearch/src/main/java/org/apache/tika/pipes/reporters/opensearch/OpenSearchPipesReporter.java index 7dbe136218..fb2aa279cb 100644 --- a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-opensearch/src/main/java/org/apache/tika/pipes/reporters/opensearch/OpenSearchPipesReporter.java +++ b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-opensearch/src/main/java/org/apache/tika/pipes/reporters/opensearch/OpenSearchPipesReporter.java @@ -37,8 +37,8 @@ import org.apache.tika.metadata.ExternalProcess; import org.apache.tika.metadata.Metadata; import org.apache.tika.pipes.FetchEmitTuple; -import org.apache.tika.pipes.PipesReporter; import org.apache.tika.pipes.PipesResult; +import org.apache.tika.pipes.reporter.PipesReporter; import org.apache.tika.utils.StringUtils; /** diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java index 6c7cc97f76..4537a5ae29 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java @@ -206,7 +206,7 @@ private static ServerDetails initServer(TikaServerConfig tikaServerConfig) throw FetcherManager fetcherManager = null; InputStreamFactory inputStreamFactory = null; if (tikaServerConfig.isEnableUnsecureFeatures()) { - fetcherManager = FetcherManager.load(tikaServerConfig.getConfigPath()); + fetcherManager = new FetcherManager(); inputStreamFactory = new FetcherStreamFactory(fetcherManager); } else { inputStreamFactory = new DefaultInputStreamFactory(); diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java index 47219afa4e..235ad21815 100644 --- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java +++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java @@ -34,10 +34,11 @@ import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.pf4j.PluginManager; -import org.apache.tika.exception.TikaConfigException; import org.apache.tika.io.TikaInputStream; import org.apache.tika.pipes.fetcher.FetcherManager; +import org.apache.tika.pipes.plugin.TikaPluginManager; import org.apache.tika.server.core.resource.TikaResource; import org.apache.tika.server.core.writer.JSONMessageBodyWriter; @@ -81,10 +82,16 @@ protected InputStream getTikaConfigInputStream() throws IOException { @Override protected InputStreamFactory getInputStreamFactory(InputStream is) { + System.setProperty("pf4j.mode", "development"); // Development mode lets you work from source dir easier. + Path fetchersPath = Path.of("..", ".."); + PluginManager pluginManager = new TikaPluginManager(fetchersPath); + pluginManager.loadPlugins(); + pluginManager.startPlugins(); + FetcherManager fetcherManager = FetcherManager.load(pluginManager); + try (TikaInputStream tis = TikaInputStream.get(is)) { - FetcherManager fetcherManager = FetcherManager.load(tis.getPath()); return new FetcherStreamFactory(fetcherManager); - } catch (IOException | TikaConfigException e) { + } catch (IOException e) { throw new RuntimeException(e); } }