Skip to content

Commit a79b365

Browse files
authored
feat: add ubuntu setup script (#279)
1 parent 9062d25 commit a79b365

File tree

5 files changed

+193
-28
lines changed

5 files changed

+193
-28
lines changed

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
## 0.4.16-dev0
1+
## 0.4.16-dev1
22

33
### Enhancements
44

55
* Fallback to using file extensions for filetype detection if `libmagic` is not present
66

7+
### Features
8+
9+
* Added setup script for Ubuntu
10+
711
## 0.4.15
812

913
### Enhancements

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ check-scripts:
130130
.PHONY: check-version
131131
check-version:
132132
# Fail if syncing version would produce changes
133-
scripts/version-sync.sh -c
133+
scripts/version-sync.sh -c -f "unstructured/__version__.py" semver
134134

135135
## tidy: run black
136136
.PHONY: tidy
@@ -141,7 +141,7 @@ tidy:
141141
## version-sync: update __version__.py with most recent version from CHANGELOG.md
142142
.PHONY: version-sync
143143
version-sync:
144-
scripts/version-sync.sh
144+
scripts/version-sync.sh -f "unstructured/__version__.py" semver
145145

146146
.PHONY: check-coverage
147147
check-coverage:

scripts/setup_ubuntu.sh

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/bin/bash
2+
set +u -e
3+
4+
if [ -z "$1" ]; then
5+
echo "When running this script, please supply the name of the user account for which to set up unstructured dependencies."
6+
echo "Ex: ${0} abertl"
7+
exit 1
8+
fi
9+
10+
set -ux
11+
12+
# Set user account for which we're configuring the tools
13+
USER_ACCOUNT=$1
14+
15+
# Set package manager command for this distribution
16+
pac="apt"
17+
18+
# If we're not running as root, we want to prefix certain commands with sudo
19+
if [[ $(whoami) == 'root' ]]; then
20+
sudo=''; else
21+
sudo='sudo'
22+
fi
23+
24+
# Update existing packages
25+
# Reconfigure the service that detects the need for service restarts from interactive mode (user
26+
# needs to manually confirm which services to restart) to automatic. If we don't do this we'll
27+
# get hung up on a screen asking us which services we want to restart after upgrading packages.
28+
$sudo $pac update -y
29+
if [[ -d /etc/needrestart/conf.d ]]; then
30+
# shellcheck disable=SC2016
31+
echo '$nrconf{restart} = '"'a';" | $sudo tee /etc/needrestart/conf.d/99z_temp_disable.conf
32+
fi
33+
$sudo $pac upgrade -y
34+
35+
#### Git
36+
# Install git
37+
$sudo $pac install -y git
38+
39+
#### Python
40+
# Install tools needed to build python
41+
$sudo $pac install -y curl gcc bzip2 sqlite zlib1g-dev libreadline-dev libsqlite3-dev libssl-dev tk-dev libffi-dev xz-utils make build-essential libbz2-dev wget llvm libncursesw5-dev libxml2-dev libxmlsec1-dev liblzma-dev
42+
# Install pyenv
43+
if [[ ! -d /home/$USER_ACCOUNT/.pyenv ]]; then
44+
sudo -u "$USER_ACCOUNT" -i <<'EOF'
45+
cd $HOME
46+
curl https://pyenv.run | bash
47+
EOF
48+
# Remove initialization lines from .bashrc if they are already there, so we don't duplicate them
49+
# shellcheck disable=SC2016
50+
sed -i '/export PYENV_ROOT="$HOME\/.pyenv"/d' /home/"$USER_ACCOUNT"/.bashrc
51+
# shellcheck disable=SC2016
52+
sed -i '/command -v pyenv >\/dev\/null || export PATH="$PYENV_ROOT\/bin:$PATH"/d' /home/"$USER_ACCOUNT"/.bashrc
53+
# shellcheck disable=SC2016
54+
sed -i '/eval "$(pyenv init -)"/d' /home/"$USER_ACCOUNT"/.bashrc
55+
# shellcheck disable=SC2016
56+
sed -i '/eval "$(pyenv virtualenv-init -)"/d' /home/"$USER_ACCOUNT"/.bashrc
57+
# Add initialization lines to .bashrc
58+
# shellcheck disable=SC2016
59+
sed -i '1ieval "$(pyenv virtualenv-init -)"' /home/"$USER_ACCOUNT"/.bashrc
60+
# shellcheck disable=SC2016
61+
sed -i '1ieval "$(pyenv init -)"' /home/"$USER_ACCOUNT"/.bashrc
62+
# shellcheck disable=SC2016
63+
sed -i '1icommand -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' /home/"$USER_ACCOUNT"/.bashrc
64+
# shellcheck disable=SC2016
65+
sed -i '1iexport PYENV_ROOT="$HOME/.pyenv"' /home/"$USER_ACCOUNT"/.bashrc
66+
# install python
67+
sudo -u "$USER_ACCOUNT" -i <<'EOF'
68+
pyenv install 3.8.15
69+
EOF
70+
fi
71+
72+
#### OpenCV dependencies
73+
$sudo $pac install -y libgl1
74+
75+
#### Poppler
76+
# Install poppler
77+
$sudo $pac install -y poppler-utils
78+
79+
#### Tesseract
80+
# Install tesseract as well as Russian language
81+
$sudo $pac install -y tesseract-ocr libtesseract-dev tesseract-ocr-rus
82+
83+
#### Put needrestart back the way it was and clean up
84+
if [[ -d /etc/needrestart/conf.d/ ]]; then
85+
$sudo rm -f /etc/needrestart/conf.d/99z_temp_disable.conf
86+
fi

scripts/version-sync.sh

Lines changed: 99 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,110 @@
11
#!/bin/bash
2+
function usage {
3+
echo "Usage: $(basename "$0") [-c] -f FILE_TO_CHANGE REPLACEMENT_FORMAT [-f FILE_TO_CHANGE REPLACEMENT_FORMAT ...]" 2>&1
4+
echo 'Synchronize files to latest version in source file'
5+
echo ' -s Specifies source file for version (default is CHANGELOG.md)'
6+
echo ' -f Specifies a file to change and the format for searching and replacing versions'
7+
echo ' FILE_TO_CHANGE is the file to be updated/checked for updates'
8+
echo ' REPLACEMENT_FORMAT is one of (semver, release, api-release)'
9+
echo ' semver indicates to look for a full semver version and replace with the latest full version'
10+
echo ' release indicates to look for a release semver version (x.x.x) and replace with the latest release version'
11+
echo ' api-release indicates to look for a release semver version in the context of an api route and replace with the latest release version'
12+
echo ' -c Compare versions and output proposed changes without changing anything.'
13+
}
214

3-
CHECK=0
4-
while getopts ":c" opt; do
15+
function getopts-extra () {
16+
declare i=1
17+
# if the next argument is not an option, then append it to array OPTARG
18+
while [[ ${OPTIND} -le $# && ${!OPTIND:0:1} != '-' ]]; do
19+
OPTARG[i]=${!OPTIND}
20+
i+=1
21+
OPTIND+=1
22+
done
23+
}
24+
25+
# Parse input options
26+
declare CHECK=0
27+
declare SOURCE_FILE="CHANGELOG.md"
28+
declare -a FILES_TO_CHECK=()
29+
declare -a REPLACEMENT_FORMATS=()
30+
declare args
31+
declare OPTIND OPTARG opt
32+
while getopts ":hcs:f:" opt; do
533
case $opt in
34+
h)
35+
usage
36+
exit 0
37+
;;
638
c)
739
CHECK=1
840
;;
41+
s)
42+
SOURCE_FILE="$OPTARG"
43+
;;
44+
f)
45+
getopts-extra "$@"
46+
args=( "${OPTARG[@]}" )
47+
# validate length of args, should be 2
48+
if [ ${#args[@]} -eq 2 ]; then
49+
FILES_TO_CHECK+=( "${args[0]}" )
50+
REPLACEMENT_FORMATS+=( "${args[1]}" )
51+
else
52+
echo "Exactly 2 arguments must follow -f option." >&2
53+
exit 1
54+
fi
55+
;;
956
\?)
10-
echo "Invalid option: -$OPTARG. Use -c to show changes without applying, use no options to apply changes." >&2
57+
echo "Invalid option: -$OPTARG." >&2
58+
usage
1159
exit 1
1260
;;
1361
esac
1462
done
1563

16-
# Version appearing earliest in CHANGELOGFILE will be used as ground truth.
17-
CHANGELOGFILE="CHANGELOG.md"
18-
VERSIONFILE="unstructured/__version__.py"
19-
RE_SEMVER_FULL="(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-((0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(\+([0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*))?"
20-
# Pull out semver appearing earliest in CHANGELOGFILE.
21-
LAST_VERSION=$(grep -o -m 1 -E "${RE_SEMVER_FULL}" "$CHANGELOGFILE")
64+
# Parse REPLACEMENT_FORMATS
65+
RE_SEMVER_FULL='(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-((0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*))*))?(\+([0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*))?'
66+
RE_RELEASE="(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)"
67+
RE_API_RELEASE="v(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)"
68+
# Pull out semver appearing earliest in SOURCE_FILE.
69+
LAST_VERSION=$(grep -o -m 1 -E "${RE_SEMVER_FULL}" "$SOURCE_FILE")
70+
LAST_RELEASE=$(grep -o -m 1 -E "${RE_RELEASE}($|[^-+])$" "$SOURCE_FILE" | grep -o -m 1 -E "${RE_RELEASE}")
71+
LAST_API_RELEASE="v$(grep -o -m 1 -E "${RE_RELEASE}($|[^-+])$" "$SOURCE_FILE" | grep -o -m 1 -E "${RE_RELEASE}")"
72+
declare -a RE_SEMVERS=()
73+
declare -a UPDATED_VERSIONS=()
74+
for i in "${!REPLACEMENT_FORMATS[@]}"; do
75+
REPLACEMENT_FORMAT=${REPLACEMENT_FORMATS[$i]}
76+
case $REPLACEMENT_FORMAT in
77+
semver)
78+
RE_SEMVERS+=( "$RE_SEMVER_FULL" )
79+
UPDATED_VERSIONS+=( "$LAST_VERSION" )
80+
;;
81+
release)
82+
RE_SEMVERS+=( "$RE_RELEASE" )
83+
UPDATED_VERSIONS+=( "$LAST_RELEASE" )
84+
;;
85+
api-release)
86+
RE_SEMVERS+=( "$RE_API_RELEASE" )
87+
UPDATED_VERSIONS+=( "$LAST_API_RELEASE" )
88+
;;
89+
*)
90+
echo "Invalid replacement format: \"${REPLACEMENT_FORMAT}\". Use semver, release, or api-release" >&2
91+
exit 1
92+
;;
93+
esac
94+
done
2295

2396
if [ -z "$LAST_VERSION" ];
2497
then
25-
# No match to semver regex in CHANGELOGFILE, so no version to go from.
26-
printf "Error: Unable to find latest version from %s.\n" "$CHANGELOGFILE"
98+
# No match to semver regex in SOURCE_FILE, so no version to go from.
99+
printf "Error: Unable to find latest version from %s.\n" "$SOURCE_FILE"
27100
exit 1
28101
fi
29102

30-
# Add files to this array that need to be kept in sync.
31-
FILES_TO_CHANGE=("$VERSIONFILE")
32-
# Add patterns to this array to be matched in the above files.
33-
RE_SEMVERS=("$RE_SEMVER_FULL")
34-
# Add versions to this array to be used as replacements for the patterns matched above from the corresponding files.
35-
UPDATED_VERSIONS=("$LAST_VERSION")
103+
# Search files in FILES_TO_CHECK and change (or get diffs)
104+
declare FAILED_CHECK=0
36105

37-
for i in "${!FILES_TO_CHANGE[@]}"; do
38-
FILE_TO_CHANGE=${FILES_TO_CHANGE[$i]}
106+
for i in "${!FILES_TO_CHECK[@]}"; do
107+
FILE_TO_CHANGE=${FILES_TO_CHECK[$i]}
39108
RE_SEMVER=${RE_SEMVERS[$i]}
40109
UPDATED_VERSION=${UPDATED_VERSIONS[$i]}
41110
FILE_VERSION=$(grep -o -m 1 -E "${RE_SEMVER}" "$FILE_TO_CHANGE")
@@ -45,7 +114,7 @@ for i in "${!FILES_TO_CHANGE[@]}"; do
45114
printf "Error: No semver version found in file %s.\n" "$FILE_TO_CHANGE"
46115
exit 1
47116
else
48-
# Replace semver in VERSIONFILE with semver obtained from CHANGELOGFILE
117+
# Replace semver in VERSIONFILE with semver obtained from SOURCE_FILE
49118
TMPFILE=$(mktemp /tmp/new_version.XXXXXX)
50119
# Check sed version, exit if version < 4.3
51120
if ! sed --version > /dev/null 2>&1; then
@@ -63,17 +132,23 @@ for i in "${!FILES_TO_CHANGE[@]}"; do
63132
DIFF=$(diff "$FILE_TO_CHANGE" "$TMPFILE" )
64133
if [ -z "$DIFF" ];
65134
then
66-
printf "version sync would make no changes.\n"
135+
printf "version sync would make no changes to %s.\n" "$FILE_TO_CHANGE"
67136
rm "$TMPFILE"
68-
exit 0
69137
else
70-
printf "version sync would make the following changes:\n%s\n" "$DIFF"
138+
FAILED_CHECK=1
139+
printf "version sync would make the following changes to %s:\n%s\n" "$FILE_TO_CHANGE" "$DIFF"
71140
rm "$TMPFILE"
72-
exit 1
73141
fi
74142
else
75143
cp "$TMPFILE" "$FILE_TO_CHANGE"
76144
rm "$TMPFILE"
77145
fi
78146
fi
79147
done
148+
149+
# Exit with code determined by whether changes were needed in a check.
150+
if [ ${FAILED_CHECK} -ne 0 ]; then
151+
exit 1
152+
else
153+
exit 0
154+
fi

unstructured/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.4.16-dev0" # pragma: no cover
1+
__version__ = "0.4.16-dev1" # pragma: no cover

0 commit comments

Comments
 (0)