Skip to content

Commit 72b52fe

Browse files
committed
simplify AWS setup
1 parent 4e05e28 commit 72b52fe

27 files changed

+201
-449
lines changed

.github/workflows/build.yml

Lines changed: 0 additions & 72 deletions
This file was deleted.

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,8 @@ node_modules
1919
/kiwix-tools
2020

2121
bin/zimdump
22+
23+
*.tfstate
24+
*.tfstate.*
25+
*.terraform
26+
*.terraform.*

Dockerfile

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,34 @@
1-
# This Dockerfile creates a self-contained image in which mirrorzim.sh can be executed
1+
# This Dockerfile creates a self-contained image in which mirrorzim.sh can be executed.
2+
# It also runs ipfs daemon.
23
#
34
# You can build the image as follows (remember to use this repo as context for the build):
4-
# docker build . -f Dockerfile -t distributed-wikipedia-mirror
5+
# docker build . --platform=linux/amd64 -f Dockerfile -t distributed-wikipedia-mirror
56
#
6-
# You can then run the container anywhere as follows
7-
# docker run --rm -v $(pwd)/snapshots:/github/workspace/snapshots -v $(pwd)/tmp:/github/workspace/tmp distributed-wikipedia-mirror <mirrorzim.sh arguments>
8-
# NOTE(s):
9-
# - volume attached at /github/workspace/snapshots will contain downloaded zim files after the run
10-
# - volume attached at /github/workspace/tmp will contain created website directories after the run
7+
# You can then run the container anywhere as follows:
8+
# docker run --ulimit nofile=65536:65536 -p 4001:4001/tcp -p 4001:4001/udp distributed-wikipedia-mirror <mirrorzim_arguments>
119

12-
FROM openzim/zim-tools:3.1.0 AS openzim
10+
FROM stedolan/jq:latest AS jq
11+
FROM openzim/zim-tools:3.1.0 AS zimdump
12+
FROM ipfs/go-ipfs:v0.12.0 AS ipfs
13+
FROM node:16
1314

14-
FROM node:16.14.0-buster-slim
15+
RUN apt-get update && apt-get install --no-install-recommends --assume-yes rsync moreutils
1516

16-
RUN apt update && apt upgrade && apt install -y curl wget rsync
17+
COPY --from=jq /usr/local/bin/jq /usr/local/bin/
18+
COPY --from=zimdump /usr/local/bin/zimdump /usr/local/bin/
19+
COPY --from=ipfs /usr/local/bin/ipfs /usr/local/bin/
1720

18-
COPY --from=openzim /usr/local/bin/zimdump /usr/local/bin
21+
COPY assets /root/assets
22+
COPY bin /root/bin
23+
COPY src /root/src
24+
COPY tools /root/tools
25+
COPY mirrorzim.sh package.json tsconfig.json /root/
1926

20-
COPY tools/docker_entrypoint.sh /usr/local/bin
27+
RUN mkdir /root/snapshots /root/tmp
28+
RUN cd /root && yarn
2129

22-
RUN mkdir -p /github/distributed-wikipedia-mirror
23-
RUN mkdir -p /github/distributed-wikipedia-mirror/snapshots
24-
RUN mkdir -p /github/distributed-wikipedia-mirror/tmp
25-
RUN mkdir -p /github/workspace
30+
EXPOSE 4001/tcp
31+
EXPOSE 4001/udp
2632

27-
COPY . /github/distributed-wikipedia-mirror
28-
29-
RUN cd /github/distributed-wikipedia-mirror && yarn
30-
31-
VOLUME [ "/github/workspace" ]
32-
33-
WORKDIR /github/distributed-wikipedia-mirror
34-
ENTRYPOINT [ "docker_entrypoint.sh" ]
33+
WORKDIR /root
34+
ENTRYPOINT [ "tools/entrypoint.sh" ]

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -254,21 +254,25 @@ $ ./mirrorzim.sh --languagecode=cu --wikitype=wikipedia --hostingdnsdomain=cu.wi
254254
## Docker build
255255

256256
A `Dockerfile` with all the software requirements is provided.
257-
For now it is only a handy container for running the process on non-Linux
258-
systems or if you don't want to pollute your system with all the dependencies.
259-
In the future it will be end-to-end blackbox that takes ZIM and spits out CID
260-
and repo.
257+
It is an end-to-end blackbox that takes mirrorzim.sh arguments, spits out CID
258+
and runs IPFS daemon.
261259

262-
To build the docker image:
260+
To run the publicly available docker image:
263261

264262
```sh
265-
docker build . -t distributed-wikipedia-mirror-build
263+
docker run --ulimit nofile=65536:65536 -p 4001:4001/tcp -p 4001:4001/udp public.ecr.aws/c4h1q7d1/distributed-wikipedia-mirror:latest <mirrorzim_arguments>
266264
```
267265

268-
To use it as a development environment:
266+
Alternatively, to build the docker image:
269267

270268
```sh
271-
docker run -it -v $(pwd):/root/distributed-wikipedia-mirror --net=host --entrypoint bash distributed-wikipedia-mirror-build
269+
docker build . --platform=linux/amd64 -f Dockerfile -t distributed-wikipedia-mirror
270+
```
271+
272+
And then, to run it:
273+
274+
```sh
275+
docker run --ulimit nofile=65536:65536 -p 4001:4001/tcp -p 4001:4001/udp distributed-wikipedia-mirror <mirrorzim_arguments>
272276
```
273277

274278
# How to Help
@@ -340,7 +344,3 @@ We are working on improving deduplication between snapshots, but for now YMMV.
340344
## Code
341345

342346
If you would like to contribute more to this effort, look at the [issues](https://github.com/ipfs/distributed-wikipedia-mirror/issues) in this github repo. Especially check for [issues marked with the "wishlist" label](https://github.com/ipfs/distributed-wikipedia-mirror/labels/wishlist) and issues marked ["help wanted"](https://github.com/ipfs/distributed-wikipedia-mirror/labels/help%20wanted).
343-
344-
## GitHub Actions Workflow
345-
346-
The GitHub Actions workflow that is available in this repository takes information about the wiki website that you want to mirror, downloads its' zim, unpacks it, converts it to a website and uploads it to S3 as a tar.gz package which is publicly accessible.

action.yml

Lines changed: 0 additions & 50 deletions
This file was deleted.

mirrorzim.sh

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ usage() {
1616
echo " [--hostingdnsdomain=<HOSTING_DNS_DOMAIN>]"
1717
echo " [--hostingipnshash=<HOSTING_IPNS_HASH>]"
1818
echo " [--mainpageversion=<MAIN_PAGE_VERSION>]"
19-
echo " [--push=<true|false>]"
2019
echo ""
2120
echo "OPTIONS"
2221
echo ""
@@ -28,7 +27,6 @@ usage() {
2827
echo " -d, --hostingdnsdomain string - the DNS domain name the mirror will be hosted at e.g. tr.wikipedia-on-ipfs.org"
2928
echo " -i, --hostingipnshash string - the IPNS hash the mirror will be hosted at e.g. QmVH1VzGBydSfmNG7rmdDjAeBZ71UVeEahVbNpFQtwZK8W"
3029
echo " -v, --mainpageversion string - an override hack used on Turkish Wikipedia, it sets the main page version as there are issues with the Kiwix version id"
31-
echo " -p, --push boolean - push to local ipfs instance (defaults to true)"
3230
exit 2
3331
}
3432

@@ -68,10 +66,6 @@ case $i in
6866
MAIN_PAGE_VERSION="${i#*=}"
6967
shift
7068
;;
71-
-p=*|--push=*)
72-
PUSH="${i#*=}"
73-
shift
74-
;;
7569
--default)
7670
DEFAULT=YES
7771
shift
@@ -116,10 +110,6 @@ if [ -z ${MAIN_PAGE_VERSION+x} ]; then
116110
MAIN_PAGE_VERSION=""
117111
fi
118112

119-
if [ -z ${PUSH+x} ]; then
120-
PUSH="true"
121-
fi
122-
123113
printf "\nEnsure zimdump is present...\n"
124114
PATH=$PATH:$(realpath ./bin)
125115
which zimdump &> /dev/null || (curl --progress-bar -L https://download.openzim.org/release/zim-tools/zim-tools_linux-x86_64-3.0.0.tar.gz | tar -xvz --strip-components=1 -C ./bin zim-tools_linux-x86_64-3.0.0/zimdump && chmod +x ./bin/zimdump)
@@ -154,11 +144,14 @@ node ./bin/run $TMP_DIRECTORY \
154144
${HOSTING_IPNS_HASH:+--hostingipnshash=$HOSTING_IPNS_HASH} \
155145
${MAIN_PAGE_VERSION:+--mainpageversion=$MAIN_PAGE_VERSION}
156146

157-
if [[ "$PUSH" == "true" ]]; then
158-
./tools/add_website_to_ipfs.sh "$ZIM_FILE" "$TMP_DIRECTORY" "-p"
159-
else
160-
printf "\n\n-------------------------\nD O N E !\n-------------------------\n"
161-
printf "ZIM: $ZIM_FILE\n"
162-
printf "TMP: $TMP_DIRECTORY"
163-
printf "\n-------------------------\n"
164-
fi
147+
printf "\nAdding the processed tmp directory to IPFS\n(this part may take long time on a slow disk):\n"
148+
CID=$(ipfs add -r --cid-version 1 --pin=false --offline -Q -p $TMP_DIRECTORY)
149+
MFS_DIR="/${ZIM_FILE}__$(date +%F_%T)"
150+
151+
# pin by adding to MFS under a meaningful name
152+
ipfs files cp /ipfs/$CID "$MFS_DIR"
153+
154+
printf "\n\n-------------------------\nD O N E !\n-------------------------\n"
155+
printf "MFS: $MFS_DIR\n"
156+
printf "CID: $CID"
157+
printf "\n-------------------------\n"

packer/README.md

Lines changed: 0 additions & 3 deletions
This file was deleted.

packer/provisioner.sh

Lines changed: 0 additions & 50 deletions
This file was deleted.

0 commit comments

Comments
 (0)