Skip to content

Commit ef33864

Browse files
authored
New Script: Apache Tika (#2079)
* New Script: Apache Tika * Temp: Replace github URLs to my own fork * Add additional dependencies according to the Docker image installation See https://github.com/apache/tika-docker/blob/master/full/Dockerfile * Apache Tika: Set correct tags * Apache Tika: Set TODO to make it updateable * Apache Tika: Fix "software-properties-common: command not found" * Apache Tika: Automate version detection * Apache Tika: Add `update_script` * Apache Tika: Added clean up of `/opt/apache-tika/tika-server-standard-prev-version.jar` after upgrade * Apache Tika: Bump up ram to 2048 * Apache Tika: Set updateable to true * Apache Tika: Switch from `default-jdk` to `openjdk-17-jre-headless` * Apache Tika: Removed comment about Docker file * Apache Tika: Removed empty line * Revert "Temp: Replace github URLs to my own fork" This reverts commit f1c5d87.
1 parent d7b6a97 commit ef33864

File tree

3 files changed

+181
-0
lines changed

3 files changed

+181
-0
lines changed

ct/apache-tika.sh

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env bash
2+
source <(curl -s https://raw.githubusercontent.com/community-scripts/ProxmoxVE/main/misc/build.func)
3+
# Copyright (c) 2021-2025 community-scripts ORG
4+
# Author: Andy Grunwald (andygrunwald)
5+
# License: MIT | https://github.com/community-scripts/ProxmoxVE/raw/main/LICENSE
6+
# Source: https://github.com/apache/tika/
7+
8+
# App Default Values
9+
APP="Apache-Tika"
10+
var_tags="document"
11+
var_cpu="1"
12+
var_ram="2048"
13+
var_disk="10"
14+
var_os="debian"
15+
var_version="12"
16+
var_unprivileged="1"
17+
18+
# App Output & Base Settings
19+
header_info "$APP"
20+
base_settings
21+
22+
# Core
23+
variables
24+
color
25+
catch_errors
26+
27+
function update_script() {
28+
header_info
29+
check_container_storage
30+
check_container_resources
31+
if [[ ! -f /etc/systemd/system/apache-tika.service ]]; then
32+
msg_error "No ${APP} Installation Found!"
33+
exit
34+
fi
35+
RELEASE="$(wget -qO- https://dlcdn.apache.org/tika/ | grep -oP '(?<=href=")[0-9]+\.[0-9]+\.[0-9]+(?=/")' | sort -V | tail -n1)"
36+
if [[ ! -f /opt/${APP}_version.txt ]] || [[ "${RELEASE}" != "$(cat /opt/${APP}_version.txt)" ]]; then
37+
msg_info "Stopping ${APP}"
38+
systemctl stop apache-tika
39+
msg_ok "Stopped ${APP}"
40+
41+
msg_info "Updating ${APP} to v${RELEASE}"
42+
cd /opt/apache-tika
43+
wget -q "https://dlcdn.apache.org/tika/${RELEASE}/tika-server-standard-${RELEASE}.jar"
44+
mv --force tika-server-standard.jar tika-server-standard-prev-version.jar
45+
mv tika-server-standard-${RELEASE}.jar tika-server-standard.jar
46+
echo "${RELEASE}" >/opt/${APP}_version.txt
47+
msg_ok "Updated ${APP} to v${RELEASE}"
48+
49+
msg_info "Starting ${APP}"
50+
systemctl start apache-tika
51+
msg_ok "Started ${APP}"
52+
msg_info "Cleaning Up"
53+
rm -rf /opt/apache-tika/tika-server-standard-prev-version.jar
54+
msg_ok "Cleanup Completed"
55+
msg_ok "Updated Successfully"
56+
else
57+
msg_ok "No update required. ${APP} is already at v${RELEASE}"
58+
fi
59+
exit
60+
}
61+
62+
start
63+
build_container
64+
description
65+
66+
msg_ok "Completed Successfully!\n"
67+
echo -e "${CREATING}${GN}${APP} setup has been successfully initialized!${CL}"
68+
echo -e "${INFO}${YW} Access it using the following URL:${CL}"
69+
echo -e "${TAB}${GATEWAY}${BGN}http://${IP}:9998${CL}"

install/apache-tika-install.sh

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright (c) 2021-2025 community-scripts ORG
4+
# Author: Andy Grunwald (andygrunwald)
5+
# License: MIT | https://github.com/community-scripts/ProxmoxVE/raw/main/LICENSE
6+
# Source: https://github.com/apache/tika/
7+
8+
source /dev/stdin <<< "$FUNCTIONS_FILE_PATH"
9+
color
10+
verb_ip6
11+
catch_errors
12+
setting_up_container
13+
network_check
14+
update_os
15+
16+
msg_info "Installing Dependencies"
17+
$STD apt-get install -y \
18+
curl \
19+
sudo \
20+
mc \
21+
software-properties-common \
22+
gdal-bin \
23+
tesseract-ocr \
24+
tesseract-ocr-eng \
25+
tesseract-ocr-ita \
26+
tesseract-ocr-fra \
27+
tesseract-ocr-spa \
28+
tesseract-ocr-deu
29+
$STD echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections
30+
$STD apt-get install -y \
31+
xfonts-utils \
32+
fonts-freefont-ttf \
33+
fonts-liberation \
34+
ttf-mscorefonts-installer \
35+
cabextract
36+
msg_ok "Installed Dependencies"
37+
38+
msg_info "Setup OpenJDK"
39+
$STD apt-get install -y \
40+
openjdk-17-jre-headless
41+
msg_ok "Setup OpenJDK"
42+
43+
msg_info "Installing Apache Tika"
44+
mkdir -p /opt/apache-tika
45+
cd /opt/apache-tika
46+
RELEASE="$(wget -qO- https://dlcdn.apache.org/tika/ | grep -oP '(?<=href=")[0-9]+\.[0-9]+\.[0-9]+(?=/")' | sort -V | tail -n1)"
47+
wget -q "https://dlcdn.apache.org/tika/${RELEASE}/tika-server-standard-${RELEASE}.jar"
48+
mv tika-server-standard-${RELEASE}.jar tika-server-standard.jar
49+
echo "${RELEASE}" >/opt/${APPLICATION}_version.txt
50+
msg_ok "Installed Apache Tika"
51+
52+
msg_info "Creating Service"
53+
cat <<EOF >/etc/systemd/system/apache-tika.service
54+
[Unit]
55+
Description=Apache Tika
56+
Documentation=https://tika.apache.org/
57+
After=syslog.target network.target
58+
59+
[Service]
60+
User=root
61+
Restart=always
62+
Type=simple
63+
ExecStart=java -jar /opt/apache-tika/tika-server-standard.jar --host 0.0.0.0 --port 9998
64+
ExecReload=/bin/kill -HUP \$MAINPID
65+
66+
[Install]
67+
WantedBy=multi-user.target
68+
EOF
69+
systemctl enable -q --now apache-tika
70+
msg_ok "Created Service"
71+
72+
motd_ssh
73+
customize
74+
75+
msg_info "Cleaning up"
76+
$STD apt-get -y autoremove
77+
$STD apt-get -y autoclean
78+
msg_ok "Cleaned"

json/apache-tika.json

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"name": "Apache Tika",
3+
"slug": "apache-tika",
4+
"categories": [
5+
12
6+
],
7+
"date_created": "2025-02-05",
8+
"type": "ct",
9+
"updateable": true,
10+
"privileged": false,
11+
"interface_port": 9998,
12+
"documentation": null,
13+
"website": "https://tika.apache.org/",
14+
"logo": "https://tika.apache.org/tika.png",
15+
"description": "The Apache Tika™ toolkit detects and extracts metadata and text from over a thousand different file types (such as PPT, XLS, and PDF). All of these file types can be parsed through a single interface, making Tika useful for search engine indexing, content analysis, translation, and much more.",
16+
"install_methods": [
17+
{
18+
"type": "default",
19+
"script": "ct/apache-tika.sh",
20+
"resources": {
21+
"cpu": 1,
22+
"ram": 2024,
23+
"hdd": 10,
24+
"os": "debian",
25+
"version": "12"
26+
}
27+
}
28+
],
29+
"default_credentials": {
30+
"username": null,
31+
"password": null
32+
},
33+
"notes": []
34+
}

0 commit comments

Comments
 (0)