diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 117ec78ef..aa2c3291b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -12,6 +12,7 @@ on: pull_request: branches: - 'main' + - 'test-*' types: - opened - reopened @@ -41,24 +42,6 @@ jobs: set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; set -eux set_sl_var image "${image}" >> "${GITHUB_OUTPUT}" - # v0.14.1 = ghcr.io/meeb/tubesync:latest@sha256:63fffee3411bda395c526087d7107f76834141bb0c1443a0bd6bed5533e5a85f - tag_tried='v0.14.1' - tag_image_digest='latest@sha256:63fffee3411bda395c526087d7107f76834141bb0c1443a0bd6bed5533e5a85f' - docker pull "${image}:${tag_image_digest}" - docker pull "${image}:${tag_tried}" && tag_found=1 || { - tag_found=0 - missing="${missing}${missing:+|}${tag_tried}=${image}:${tag_image_digest}" - } - set_sl_var "${tag_tried}" "${tag_found}" >> "${GITHUB_OUTPUT}" - # v0.15.1 = ghcr.io/meeb/tubesync:latest@sha256:7303b2d8854aac15f94dbbfdd0ee66ca598ade1af6ac2d9e3d886c93ffa2d596 - tag_tried='v0.15.1' - tag_image_digest='latest@sha256:7303b2d8854aac15f94dbbfdd0ee66ca598ade1af6ac2d9e3d886c93ffa2d596' - docker pull "${image}:${tag_image_digest}" - docker pull "${image}:${tag_tried}" && tag_found=1 || { - tag_found=0 - missing="${missing}${missing:+|}${tag_tried}=${image}:${tag_image_digest}" - } - set_sl_var "${tag_tried}" "${tag_found}" >> "${GITHUB_OUTPUT}" set_sl_var missing "${missing}" >> "${GITHUB_OUTPUT}" - uses: actions/checkout@v4 - name: Lowercase github username @@ -132,6 +115,8 @@ jobs: pip install --system --strict --requirements requirements.txt - name: Set up Django environment run: | + mkdir -v -p ~/.config/TubeSync/config + sudo ln -v -s -f -T ~/.config/TubeSync/config /config cp -v -p tubesync/tubesync/local_settings.py.example tubesync/tubesync/local_settings.py cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/background_task/ patches/background_task/* cp -v -a -t "${Python3_ROOT_DIR}"/lib/python3.*/site-packages/yt_dlp/ patches/yt_dlp/* @@ -185,7 +170,94 @@ jobs: needs: ['info', 'test'] runs-on: ubuntu-latest timeout-minutes: 120 + services: + wormhole-mailbox: + image: 'ghcr.io/tcely/docker-magic-wormhole-mailbox-server:service' + ports: + - '4000:4000' + wormhole-transit: + image: 'ghcr.io/tcely/docker-magic-wormhole-transit-relay:main' + ports: + - '4001:4001' steps: + - name: Set environment variables with jq + run: | + # jq + cat >| .ffmpeg.releases.json <<'EOF' + ${{ needs.info.outputs.ffmpeg-releases }} + EOF + + FFMPEG_DATE='[foreach .[] as $release ([{}, []]; [ .[0] + { ($release.commit): ([ $release.date ] + (.[0][($release.commit)] // [])) }, [ .[1][0] // $release.commit ] ] ; .[0][(.[1][0])] )][-1][0]' ; + FFMPEG_VERSION='.[]|select(.date == $previous)|.versions[]|select(startswith("N-"))' ; + + mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; + { + for var in FFMPEG_DATE FFMPEG_VERSION + do + # jq_arg="$( eval printf -- "'%s\n'" "$(printf -- '"${%s}"' "${var}")" )" ; + jq_arg="$( eval printf -- "'%s\n'" '"${'"${var}"'}"' )" ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + jq -r --arg previous "${previous_value-}" "${jq_arg}" -- .ffmpeg.releases.json ; + close_ml_var "${delim}" "${var}" ; + previous_value="$( jq -r --arg previous "${previous_value-}" "${jq_arg}" -- .ffmpeg.releases.json )" ; + done ; + + unset -v delim jq_arg previous_value var ; + } >> "${GITHUB_ENV}" + rm -v -f .ffmpeg.releases.json + - name: Set environment variables with GitHub CLI + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # gh api + GH_UPSTREAM_OWNER='.parent.owner.login' ; + GH_UPSTREAM_REPO='.parent.name' ; + GH_UPSTREAM_SLUG='.parent.full_name' ; + + mk_delim() { printf -- '"%s_EOF_%d_"' "$1" "${RANDOM}" ; } ; + open_ml_var() { local f=''\%'s<<'\%'s\n' ; printf -- "${f}" "$2" "$1" ; } ; + close_ml_var() { local f='%s\n' ; printf -- "${f}" "$1" ; } ; + { + for var in GH_UPSTREAM_OWNER # GH_UPSTREAM_REPO GH_UPSTREAM_SLUG + do + # jq_arg="$( eval printf -- "'%s\n'" "$(printf -- '"${%s}"' "${var}")" )" ; + jq_arg="$( eval printf -- "'%s\n'" '"${'"${var}"'}"' )" ; + delim="$(mk_delim "${var}")" ; + open_ml_var "${delim}" "${var}" ; + gh api "repos/${GITHUB_REPOSITORY}" --cache 1h --jq "${jq_arg}" ; + close_ml_var "${delim}" "${var}" ; + done ; + unset -v delim jq_arg var ; + } >> "${GITHUB_ENV}" + + # Delete the oldest unused cache entries + printf -- '%s\n' 'Deleting unused cache entries' ; + gh cache list --sort last_accessed_at --order asc --ref "${GITHUB_REF}" | \ + awk '$NF == $(NF-1) {print $1}' | \ + xargs -r -t -n 1 gh cache delete 2>&1 | \ + tee /dev/stderr | wc -l | xargs -n 1 printf -- 'Total deleted: %d\n' ; + - name: Upstream registry ref + id: upstream + run: | + user_lowercase="$(printf -- '%s\n' "${GH_UPSTREAM_OWNER}" | awk '{print tolower($0);}')" ; + repo_image="ghcr.io/${user_lowercase}/${IMAGE_NAME}" + set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; + set_sl_var >> "${GITHUB_OUTPUT}" \ + ref "${repo_image}:latest" \ + tag "${repo_image}:latest" ; + - name: Registry ref + id: origin + run: | + user_lowercase="$(printf -- '%s\n' "${GITHUB_ACTOR}" | awk '{print tolower($0);}')" ; + repo_image="ghcr.io/${user_lowercase}/${IMAGE_NAME}" + set_sl_var() { local f='%s=%s\n' ; printf -- "${f}" "$@" ; } ; + set_sl_var >> "${GITHUB_OUTPUT}" \ + ref "${repo_image}:cache" \ + tag "${repo_image}:latest" ; - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -198,7 +270,7 @@ jobs: DOCKER_TOKEN: ${{ 'meeb' == github.repository_owner && secrets.REGISTRY_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} run: echo "${DOCKER_TOKEN}" | docker login --password-stdin --username "${DOCKER_USERNAME}" "${DOCKER_REGISTRY}" - name: Push missing release tags - if: ${{ 'meeb' == github.actor && github.actor == github.repository_owner && '' != needs.info.outputs.missing-tags }} + if: ${{ false && 'meeb' == github.actor && github.actor == github.repository_owner && '' != needs.info.outputs.missing-tags }} run: | missing='${{ needs.info.outputs.missing-tags }}' image='${{ needs.info.outputs.missing-tags-image }}' @@ -212,15 +284,97 @@ jobs: docker push "${image}:${tag}" ; ) done + - name: Checkout + uses: actions/checkout@v4 + - name: Create cache directory on the runner + run: | + mkdir -v -p .cache + : >> .cache/reset + mkdir -v .cache/saved .cache/removed .cache/runner + - name: Cache from tubesync stage + id: cache + uses: actions/cache@v4 + with: + path: | + .cache/saved + .cache/runner + key: docker-cache-tubesync-${{ hashFiles('.cache/reset') }}-${{ hashFiles('*file', '.github/workflows/ci.yaml') }} + restore-keys: | + docker-cache-tubesync-${{ hashFiles('.cache/reset') }}- + - name: List cache directory on the runner + run: | + # limited listing when the cache was restored + ls -al .cache && + ls -al .cache/* && + ls -al .cache/*/* && + ls -al .cache/*/*/* || + ls -alR .cache + - name: Start magic-wormhole services on the runner + if: ${{ 'true' != steps.cache.outputs.cache-hit }} + id: wormhole + run: | + rm -rf .cache/runner/wormhole + sudo apt-get install python3-venv + venv_dir=".cache/runner/${RUNNER_ARCH}/wormhole" && + python3 -m venv --upgrade-deps "${venv_dir}" && + . "${venv_dir}"/bin/activate || exit + pip install 'magic-wormhole' + # determine the runner IP address + _awk_prog='$0 !~ /scope host/ && "inet" == $1 {split($2, P, "/"); print P[1]; exit;}' + runner_ip="$( ip addr sh | awk "${_awk_prog}" )" + # set variables + relay_arg="ws://${runner_ip}:4000/v1" + transit_arg="tcp:[${runner_ip}]:4001" + # generate the code and receive the first transfer + ( wormhole \ + --appid TubeSync \ + --relay-url "${relay_arg}" \ + --transit-helper "${transit_arg}" \ + receive -a -c 3 \ + --accept-file -o .cache/incoming >| .cache/receive.out 2>&1 && \ + mv --backup=numbered -f .cache/incoming/* .cache/saved/ || : ; \ + mv --backup=numbered -f .cache/saved/*.~[0-9]~ .cache/removed/ || : ; ) & + _pid=$!; sleep 1 && grep -e '^Allocated code:' .cache/receive.out | cut -d ' ' -f 3- >| .cache/.wormhole-code + cat -v -n .cache/receive.out + rm -v -f .cache/receive.out + code="$(< .cache/.wormhole-code)" + rm -v -f .cache/.wormhole-code + # create output variables + printf -- '%s=%s\n' >> "$GITHUB_OUTPUT" \ + code "${code}" \ + relay "${relay_arg}" \ + runner_ip "${runner_ip}" \ + transit "${transit_arg}" ; + # receive the saved directories + ( cd .cache && + while test -d /proc/"${_pid}" ; do sleep 5 ; done && + while { \ + wormhole \ + --appid TubeSync \ + --relay-url "${relay_arg}" \ + --transit-helper "${transit_arg}" \ + receive \ + --accept-file -o incoming "${code}" || : ; \ + } + do + mv --backup=numbered -f incoming/* saved/ || : ; + mv --backup=numbered -f saved/*.~[0-9]~ removed/ || : ; + rm -rf removed/* || : ; + done &) - name: Build image for `dive` id: build-dive-image uses: docker/build-push-action@v6 with: build-args: | + CI=${{ env.CI }} IMAGE_NAME=${{ env.IMAGE_NAME }} FFMPEG_DATE=${{ needs.info.outputs.ffmpeg-date }} FFMPEG_VERSION=${{ needs.info.outputs.ffmpeg-version }} YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} + WORMHOLE_RELAY=${{ env.WORMHOLE_RELAY }} + WORMHOLE_TRANSIT=${{ env.WORMHOLE_TRANSIT }} + build-contexts: | + cache-tubesync=.cache/saved cache-from: type=gha load: true platforms: linux/amd64 @@ -238,21 +392,34 @@ jobs: --highestWastedBytes '50M' - name: Build and push id: build-push - timeout-minutes: 60 + timeout-minutes: 90 uses: docker/build-push-action@v6 + env: + WORMHOLE_CODE: ${{ steps.wormhole.outputs.code }} + WORMHOLE_RELAY: ${{ steps.wormhole.outputs.relay }} + WORMHOLE_TRANSIT: ${{ steps.wormhole.outputs.transit }} with: platforms: linux/amd64,linux/arm64 - push: ${{ 'success' == needs.test.result && 'meeb' == github.repository_owner && 'pull_request' != github.event_name && 'true' || 'false' }} - tags: ghcr.io/${{ needs.info.outputs.lowercase-github-actor }}/${{ env.IMAGE_NAME }}:latest + push: ${{ 'success' == needs.test.result && 'pull_request' != github.event_name && 'true' || 'false' }} + provenance: false + tags: ${{ steps.origin.outputs.tag }} cache-from: | - type=registry,ref=ghcr.io/${{ needs.info.outputs.lowercase-github-actor }}/${{ env.IMAGE_NAME }}:latest - type=registry,ref=ghcr.io/${{ needs.info.outputs.lowercase-github-repository_owner }}/${{ env.IMAGE_NAME }}:latest type=gha + type=registry,ref=${{ steps.origin.outputs.ref }} + type=registry,ref=${{ steps.upstream.outputs.ref }} cache-to: | type=gha,mode=max - ${{ 'meeb' == github.repository_owner && 'pull_request' != github.event_name && 'type=inline' || '' }} build-args: | + CI=${{ env.CI }} IMAGE_NAME=${{ env.IMAGE_NAME }} FFMPEG_DATE=${{ needs.info.outputs.ffmpeg-date }} FFMPEG_VERSION=${{ needs.info.outputs.ffmpeg-version }} YTDLP_DATE=${{ fromJSON(needs.info.outputs.ytdlp-latest-release).tag.name }} + WORMHOLE_RELAY=${{ env.WORMHOLE_RELAY }} + WORMHOLE_TRANSIT=${{ env.WORMHOLE_TRANSIT }} + build-contexts: | + cache-tubesync=.cache/saved + secret-envs: | + WORMHOLE_CODE=WORMHOLE_CODE + WORMHOLE_RELAY=WORMHOLE_RELAY + WORMHOLE_TRANSIT=WORMHOLE_TRANSIT diff --git a/Dockerfile b/Dockerfile index da1e84c55..82901e227 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,7 @@ ARG SHA256_S6_ARM64="8b22a2eaca4bf0b27a43d36e65c89d2701738f628d1abd0cea5569619f6 ARG SHA256_S6_NOARCH="6dbcde158a3e78b9bb141d7bcb5ccb421e563523babbe2c64470e76f4fd02dae" ARG ALPINE_VERSION="latest" +ARG BUN_VERSION="1-slim" ARG DEBIAN_VERSION="bookworm-slim" ARG FFMPEG_PREFIX_FILE="ffmpeg-${FFMPEG_VERSION}" @@ -18,6 +19,19 @@ ARG FFMPEG_SUFFIX_FILE=".tar.xz" ARG FFMPEG_CHECKSUM_ALGORITHM="sha256" ARG S6_CHECKSUM_ALGORITHM="sha256" +ARG CACHE_PATH="/cache" + + +FROM alpine:${ALPINE_VERSION} AS populate-apt-cache-dirs +ARG TARGETARCH +RUN --mount=type=bind,from=cache-tubesync,target=/restored \ + set -ex ; \ + mkdir -v -p /apt-cache-cache /apt-lib-cache ; \ + # restore `apt` files + cp -at /apt-cache-cache/ /restored/apt-cache-cache/* || : ; \ + # to be careful, ensure that these files aren't from a different architecture + rm -v -f /apt-cache-cache/*cache.bin ; \ + cp -at /apt-lib-cache/ "/restored/${TARGETARCH}/apt-lib-cache"/* || : ; FROM debian:${DEBIAN_VERSION} AS tubesync-base @@ -25,6 +39,7 @@ ARG TARGETARCH ENV DEBIAN_FRONTEND="noninteractive" \ APT_KEEP_ARCHIVES=1 \ + EDITOR="editor" \ HOME="/root" \ LANGUAGE="en_US.UTF-8" \ LANG="en_US.UTF-8" \ @@ -34,8 +49,8 @@ ENV DEBIAN_FRONTEND="noninteractive" \ PIP_NO_COMPILE=1 \ PIP_ROOT_USER_ACTION='ignore' -RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ - --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt,source=/apt-lib-cache,from=populate-apt-cache-dirs \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt,source=/apt-cache-cache,from=populate-apt-cache-dirs \ # to be careful, ensure that these files aren't from a different architecture rm -f /var/cache/apt/*cache.bin ; \ # Update from the network and keep cache @@ -284,6 +299,32 @@ COPY --from=s6-overlay-extracted /s6-overlay-rootfs / FROM tubesync-base AS tubesync-uv COPY --from=uv-binaries /uv /uvx /usr/local/bin/ +FROM oven/bun:${BUN_VERSION} AS bun-base + +FROM debian:${DEBIAN_VERSION} AS bun +COPY --from=bun-base /usr/local/bin/bun /usr/local/bun/bin/bun +RUN mkdir -v -p /usr/local/bun/node-fallback/bin && \ + ln -v -T -s ../../bin/bun /usr/local/bun/node-fallback/bin/node && \ + ln -v -T -s bun /usr/local/bun/bin/bunx && \ + mkdir -v -p /usr/local/bin && \ + ln -v -T -s ../bun/bin/bun /usr/local/bin/bun && \ + ln -v -T -s bun /usr/local/bin/bunx && \ + ls -H -l /usr/local/bun/node-fallback/bin/node \ + /usr/local/bun/bin/bun /usr/local/bun/bin/bunx \ + /usr/local/bin/bun /usr/local/bin/bunx && \ + /usr/local/bun/bin/bun --version + +FROM denoland/deno:bin AS deno-binary + +FROM debian:${DEBIAN_VERSION} AS deno +COPY --from=deno-binary /deno /usr/local/bin/ + +FROM alpine:${ALPINE_VERSION} AS populate-uv-cache-dir +RUN --mount=type=bind,from=cache-tubesync,target=/restored \ + set -x ; \ + cp -at / '/restored/uv-cache' || \ + mkdir -v /uv-cache ; + FROM tubesync-base AS tubesync-openresty COPY --from=openresty-debian \ @@ -291,8 +332,8 @@ COPY --from=openresty-debian \ COPY --from=openresty-debian \ /etc/apt/sources.list.d/openresty.list /etc/apt/sources.list.d/openresty.list -RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ - --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt,source=/apt-lib-cache,from=populate-apt-cache-dirs \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt,source=/apt-cache-cache,from=populate-apt-cache-dirs \ set -x && \ apt-get update && \ apt-get -y --no-install-recommends install \ @@ -306,8 +347,8 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va FROM tubesync-base AS tubesync-nginx -RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ - --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt,source=/apt-lib-cache,from=populate-apt-cache-dirs \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt,source=/apt-cache-cache,from=populate-apt-cache-dirs \ set -x && \ apt-get update && \ apt-get -y --no-install-recommends install \ @@ -321,31 +362,31 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va apt-get -y autoclean && \ rm -v -f /var/cache/debconf/*.dat-old +# The preference for openresty over nginx, +# is for the newer version. FROM tubesync-openresty AS tubesync ARG S6_VERSION -ARG FFMPEG_DATE -ARG FFMPEG_VERSION - -ARG TARGETARCH +ARG FFMPEG_DATE FFMPEG_VERSION ENV S6_VERSION="${S6_VERSION}" \ FFMPEG_DATE="${FFMPEG_DATE}" \ - FFMPEG_VERSION="${FFMPEG_VERSION}" + FFMPEG_VERSION="${FFMPEG_VERSION}" \ + UV_LINK_MODE='copy' + +ARG TARGETARCH # Reminder: the SHELL handles all variables -RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ - --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt,source=/apt-lib-cache,from=populate-apt-cache-dirs \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt,source=/apt-cache-cache,from=populate-apt-cache-dirs \ set -x && \ apt-get update && \ # Install dependencies we keep # Install required distro packages apt-get -y --no-install-recommends install \ - libjpeg62-turbo \ libmariadb3 \ - libpq5 \ - libwebp7 \ + libonig5 \ pkgconf \ python3 \ python3-libsass \ @@ -353,9 +394,21 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va python3-socks \ curl \ less \ + lua-lpeg \ + tre-agrep \ + vis \ + xxd \ && \ # Link to the current python3 version ln -v -s -f -T "$(find /usr/local/lib -name 'python3.[0-9]*' -type d -printf '%P\n' | sort -r -V | head -n 1)" /usr/local/lib/python3 && \ + # Configure the editor alternatives + touch /usr/local/bin/babi /bin/nano /usr/bin/vim.tiny && \ + update-alternatives --install /usr/bin/editor editor /usr/local/bin/babi 50 && \ + update-alternatives --install /usr/local/bin/nano nano /bin/nano 10 && \ + update-alternatives --install /usr/local/bin/nano nano /usr/local/bin/babi 20 && \ + update-alternatives --install /usr/local/bin/vim vim /usr/bin/vim.tiny 15 && \ + update-alternatives --install /usr/local/bin/vim vim /usr/bin/vis 35 && \ + rm -v /usr/local/bin/babi /bin/nano /usr/bin/vim.tiny && \ # Create a 'app' user which the application will run as groupadd app && \ useradd -M -d /app -s /bin/false -g app app && \ @@ -367,9 +420,13 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va # Install third party software COPY --from=s6-overlay / / COPY --from=ffmpeg /usr/local/bin/ /usr/local/bin/ +#COPY --from=bun /usr/local/bun/ /usr/local/bun/ +#COPY --from=bun /usr/local/bin/ /usr/local/bin/ +#COPY --from=deno /usr/local/bin/ /usr/local/bin/ +#COPY --from=tubesync-uv /usr/local/bin/ /usr/local/bin/ -RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ - --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ +RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt,source=/apt-lib-cache,from=populate-apt-cache-dirs \ + --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt,source=/apt-cache-cache,from=populate-apt-cache-dirs \ set -x && \ apt-get update && \ # Install file @@ -389,17 +446,35 @@ RUN --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/va # Switch workdir to the the app WORKDIR /app +ARG CI +ARG CACHE_PATH ARG YTDLP_DATE +ARG WORMHOLE_RELAY # Set up the app -RUN --mount=type=tmpfs,target=/cache \ - --mount=type=cache,id=uv-cache,sharing=locked,target=/cache/uv \ - --mount=type=cache,id=pipenv-cache,sharing=locked,target=/cache/pipenv \ - --mount=type=cache,id=apt-lib-cache-${TARGETARCH},sharing=private,target=/var/lib/apt \ - --mount=type=cache,id=apt-cache-cache,sharing=private,target=/var/cache/apt \ - --mount=type=bind,source=/uv,target=/usr/local/bin/uv,from=uv-binaries \ +RUN --mount=type=tmpfs,target=${CACHE_PATH} \ + --mount=type=cache,sharing=locked,target=/var/lib/apt,source=/apt-lib-cache,from=populate-apt-cache-dirs \ + --mount=type=cache,sharing=locked,target=/var/cache/apt,source=/apt-cache-cache,from=populate-apt-cache-dirs \ + --mount=type=cache,sharing=locked,target=${CACHE_PATH}/uv,source=/uv-cache,from=populate-uv-cache-dir \ + --mount=type=secret,id=WORMHOLE_CODE,env=WORMHOLE_CODE \ + --mount=type=secret,id=WORMHOLE_RELAY,env=WORMHOLE_RELAY \ + --mount=type=secret,id=WORMHOLE_TRANSIT,env=WORMHOLE_TRANSIT \ + --mount=type=cache,sharing=private,target=${CACHE_PATH}/pip \ + --mount=type=cache,sharing=private,target=${CACHE_PATH}/pipenv \ --mount=type=bind,source=Pipfile,target=/app/Pipfile \ + --mount=type=bind,source=/uv,target=/usr/local/bin/uv,from=uv-binaries \ set -x && \ + # set up cache + { \ + XDG_CACHE_HOME="${CACHE_PATH}" ; export XDG_CACHE_HOME ; \ + saved="${CACHE_PATH}/.saved" ; \ + pycache="${CACHE_PATH}/pycache" ; \ + mkdir -p "${saved}/${TARGETARCH}" ; \ + # keep the real HOME clean + mkdir -p "${CACHE_PATH}/.home-directories" ; \ + cp -at "${CACHE_PATH}/.home-directories/" "${HOME}" && \ + HOME="${CACHE_PATH}/.home-directories/${HOME#/}" ; \ + } && \ apt-get update && \ # Install required build packages apt-get -y --no-install-recommends install \ @@ -407,6 +482,7 @@ RUN --mount=type=tmpfs,target=/cache \ g++ \ gcc \ libjpeg-dev \ + libonig-dev \ libpq-dev \ libwebp-dev \ make \ @@ -415,29 +491,26 @@ RUN --mount=type=tmpfs,target=/cache \ zlib1g-dev \ && \ # Install non-distro packages - mkdir -v -p /cache/.home-directories && \ - cp -at /cache/.home-directories/ "${HOME}" && \ - HOME="/cache/.home-directories/${HOME#/}" \ - XDG_CACHE_HOME='/cache' \ - PIPENV_VERBOSITY=64 \ - PYTHONPYCACHEPREFIX=/cache/pycache \ - uv tool run --no-config --no-progress --no-managed-python -- \ + if [ -n "${WORMHOLE_CODE}" ] ; then \ + PYTHONPYCACHEPREFIX="${pycache}" \ + uv tool run --no-config --no-progress --no-managed-python \ + --from 'magic-wormhole' -- \ + wormhole --version ; \ + fi && \ + PIPENV_VERBOSITY=2 \ + PYTHONPYCACHEPREFIX="${pycache}" \ + uv tool run --no-config --no-progress --no-managed-python -- \ pipenv lock && \ - HOME="/cache/.home-directories/${HOME#/}" \ - XDG_CACHE_HOME='/cache' \ PIPENV_VERBOSITY=1 \ - PYTHONPYCACHEPREFIX=/cache/pycache \ - uv tool run --no-config --no-progress --no-managed-python -- \ - pipenv requirements --from-pipfile --hash >| /cache/requirements.txt && \ + PYTHONPYCACHEPREFIX="${pycache}" \ + uv tool run --no-config --no-progress --no-managed-python -- \ + pipenv requirements --from-pipfile --hash >| "${CACHE_PATH}"/requirements.txt && \ rm -v Pipfile.lock && \ - cat -v /cache/requirements.txt && \ - HOME="/cache/.home-directories/${HOME#/}" \ - UV_LINK_MODE='copy' \ - XDG_CACHE_HOME='/cache' \ - PYTHONPYCACHEPREFIX=/cache/pycache \ + cat -v "${CACHE_PATH}"/requirements.txt && \ + PYTHONPYCACHEPREFIX="${pycache}" \ uv --no-config --no-progress --no-managed-python \ pip install --strict --system --break-system-packages \ - --requirements /cache/requirements.txt && \ + --requirements "${CACHE_PATH}"/requirements.txt && \ # remove the getpot_bgutil_script plugin find /usr/local/lib \ -name 'getpot_bgutil_script.py' \ @@ -450,6 +523,7 @@ RUN --mount=type=tmpfs,target=/cache \ g++ \ gcc \ libjpeg-dev \ + libonig-dev \ libpq-dev \ libwebp-dev \ make \ @@ -459,8 +533,57 @@ RUN --mount=type=tmpfs,target=/cache \ && \ apt-get -y autopurge && \ apt-get -y autoclean && \ - rm -v -f /var/cache/debconf/*.dat-old && \ - rm -v -rf /tmp/* + LD_LIBRARY_PATH=/usr/local/lib/python3/dist-packages/pillow.libs:/usr/local/lib/python3/dist-packages/psycopg_binary.libs \ + find /usr/local/lib/python3/dist-packages/ \ + -name '*.so*' -print \ + -exec du -h '{}' ';' \ + -exec ldd '{}' ';' \ + >| "${CACHE_PATH}"/python-shared-objects 2>&1 && \ + # Save our saved directory to the cache directory on the runner + ( set -x ; \ + test -n "${WORMHOLE_CODE}" || exit 0 ; \ + { \ + find /var/cache/apt/ -mindepth 1 -maxdepth 1 -name '*cache.bin' -delete || : ; \ + } && \ + cp -a /var/cache/apt "${saved}/apt-cache-cache" && \ + cp -a /var/lib/apt "${saved}/${TARGETARCH}/apt-lib-cache" && \ + cp -a "${CACHE_PATH}/uv" "${saved}/uv-cache" && \ + XDG_CACHE_HOME="${CACHE_PATH}" \ + PYTHONPYCACHEPREFIX="${pycache}" \ + uv --no-config --no-progress --no-managed-python \ + cache prune --ci --cache-dir "${saved}/uv-cache" && \ + ls -al "${saved}" && ls -al "${saved}"/* && \ + ls -al "${saved}/${TARGETARCH}"/* && \ + if [ -n "${WORMHOLE_RELAY}" ] && [ -n "${WORMHOLE_TRANSIT}" ]; then \ + XDG_CACHE_HOME="${CACHE_PATH}" \ + PYTHONPYCACHEPREFIX="${pycache}" \ + timeout -v -k 10m 25m \ + uv tool run --no-config --no-progress --no-managed-python \ + --from 'magic-wormhole' -- \ + wormhole \ + --appid TubeSync \ + --relay-url "${WORMHOLE_RELAY}" \ + --transit-helper "${WORMHOLE_TRANSIT}" \ + send \ + --hide-progress --no-qr \ + --code "${WORMHOLE_CODE}" \ + "${saved}" || : ; \ + else \ + XDG_CACHE_HOME="${CACHE_PATH}" \ + PYTHONPYCACHEPREFIX="${pycache}" \ + timeout -v -k 10m 25m \ + uv tool run --no-config --no-progress --no-managed-python \ + --from 'magic-wormhole' -- \ + wormhole send \ + --hide-progress --no-qr \ + --code "${WORMHOLE_CODE}" \ + "${saved}" || : ; \ + fi ; \ + ) && \ + rm -v -f Pipfile.lock /var/cache/debconf/*.dat-old && \ + rm -v -rf /tmp/* ; \ + grep >/dev/null -Fe ' => not found' "${CACHE_PATH}"/python-shared-objects && \ + cat -v "${CACHE_PATH}"/python-shared-objects && exit 1 || : # Copy root COPY config/root / @@ -480,17 +603,21 @@ COPY tubesync/tubesync/local_settings.py.container /app/tubesync/local_settings. # Build app RUN set -x && \ # Make absolutely sure we didn't accidentally bundle a SQLite dev database - rm -rf /app/db.sqlite3 && \ + test '!' -e /app/db.sqlite3 && \ # Run any required app commands /usr/bin/python3 -B /app/manage.py compilescss && \ /usr/bin/python3 -B /app/manage.py collectstatic --no-input --link && \ + rm -rf /config /downloads /run/app && \ # Create config, downloads and run dirs mkdir -v -p /run/app && \ - mkdir -v -p /config/media && \ + mkdir -v -p /config/media /config/tasks && \ mkdir -v -p /config/cache/pycache && \ mkdir -v -p /downloads/audio && \ mkdir -v -p /downloads/video && \ # Check nginx configuration copied from config/root/etc + mkdir -v -p /config/log && \ + cp -a /var/log/nginx /config/log/ && \ + cp -v -p /config/log/nginx/access.log /config/log/nginx/access.log.gz && \ openresty -c /etc/nginx/nginx.conf -e stderr -t && \ # Append software versions ffmpeg_version=$(/usr/local/bin/ffmpeg -version | awk -v 'ev=31' '1 == NR && "ffmpeg" == $1 { print $3; ev=0; } END { exit ev; }') && \ diff --git a/Pipfile b/Pipfile index 49b5127be..b809abe97 100644 --- a/Pipfile +++ b/Pipfile @@ -8,7 +8,9 @@ autopep8 = "*" [packages] django = "~=5.2.1" +django-huey = "*" django-sass-processor = {extras = ["management-command"], version = "*"} +libsass = "*" pillow = "*" whitenoise = "*" gunicorn = "*" @@ -25,3 +27,4 @@ emoji = "*" brotli = "*" html5lib = "*" bgutil-ytdlp-pot-provider = "*" +babi = "*" diff --git a/README.md b/README.md index c5adc5c51..e49cacfa6 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ services: tubesync: image: ghcr.io/meeb/tubesync:latest container_name: tubesync - restart: unless-stopped + restart: on-failure:3 ports: - 4848:4848 volumes: diff --git a/config/root/etc/nginx/nginx.conf b/config/root/etc/nginx/nginx.conf index 4ca5bac4b..ef6f4c82c 100644 --- a/config/root/etc/nginx/nginx.conf +++ b/config/root/etc/nginx/nginx.conf @@ -11,6 +11,10 @@ pid /run/nginx.pid; env YT_POT_BGUTIL_BASE_URL; +# load_module snippets for installed modules +include /etc/nginx/modules-enabled/*.conf; +# a very silly syntax rendering bug needs this */ + events { worker_connections 1024; } @@ -43,8 +47,8 @@ http { # Logging log_format host '$remote_addr - $remote_user [$time_local] "[$host] $request" $status $bytes_sent "$http_referer" "$http_user_agent" "$gzip_ratio"'; - access_log /dev/stdout; - error_log stderr; + access_log /config/log/nginx/access.log.gz combined gzip=9 flush=1m; + error_log /config/log/nginx/error.log info; # GZIP gzip on; diff --git a/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-db-worker b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-db-worker new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-db-worker @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-fs-worker b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-fs-worker new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-fs-worker @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-network-worker b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-network-worker new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/contents.d/tubesync-network-worker @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/type b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/type new file mode 100644 index 000000000..757b42211 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/background-task-workers/type @@ -0,0 +1 @@ +bundle diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-database b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-database new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-database @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-filesystem b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-filesystem new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-filesystem @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-net-limited b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-net-limited new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-net-limited @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-network b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-network new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/contents.d/huey-network @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/type b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/type new file mode 100644 index 000000000..757b42211 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-consumers/type @@ -0,0 +1 @@ +bundle diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/base b/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/base new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/base @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/gunicorn b/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/gunicorn new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-database/dependencies.d/gunicorn @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-database/down-signal b/config/root/etc/s6-overlay/s6-rc.d/huey-database/down-signal new file mode 100644 index 000000000..d751378e1 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-database/down-signal @@ -0,0 +1 @@ +SIGINT diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-database/run b/config/root/etc/s6-overlay/s6-rc.d/huey-database/run new file mode 100644 index 000000000..c803c4c3a --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-database/run @@ -0,0 +1,5 @@ +#!/command/with-contenv bash + +exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ + /usr/bin/python3 /app/manage.py djangohuey \ + --queue database diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-database/type b/config/root/etc/s6-overlay/s6-rc.d/huey-database/type new file mode 100644 index 000000000..5883cff0c --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-database/type @@ -0,0 +1 @@ +longrun diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/base b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/base new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/base @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/gunicorn b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/gunicorn new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/dependencies.d/gunicorn @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/down-signal b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/down-signal new file mode 100644 index 000000000..d751378e1 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/down-signal @@ -0,0 +1 @@ +SIGINT diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/run b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/run new file mode 100644 index 000000000..f52ee7c64 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/run @@ -0,0 +1,5 @@ +#!/command/with-contenv bash + +exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ + /usr/bin/python3 /app/manage.py djangohuey \ + --queue filesystem diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/type b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/type new file mode 100644 index 000000000..5883cff0c --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-filesystem/type @@ -0,0 +1 @@ +longrun diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/base b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/base new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/base @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/gunicorn b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/gunicorn new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/dependencies.d/gunicorn @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/down-signal b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/down-signal new file mode 100644 index 000000000..d751378e1 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/down-signal @@ -0,0 +1 @@ +SIGINT diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/run b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/run new file mode 100644 index 000000000..c40cd7860 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/run @@ -0,0 +1,5 @@ +#!/command/with-contenv bash + +exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ + /usr/bin/python3 /app/manage.py djangohuey \ + --queue limited diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/type b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/type new file mode 100644 index 000000000..5883cff0c --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-net-limited/type @@ -0,0 +1 @@ +longrun diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/base b/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/base new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/base @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/gunicorn b/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/gunicorn new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-network/dependencies.d/gunicorn @@ -0,0 +1 @@ + diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-network/down-signal b/config/root/etc/s6-overlay/s6-rc.d/huey-network/down-signal new file mode 100644 index 000000000..d751378e1 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-network/down-signal @@ -0,0 +1 @@ +SIGINT diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-network/run b/config/root/etc/s6-overlay/s6-rc.d/huey-network/run new file mode 100644 index 000000000..0e5f0e8f1 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-network/run @@ -0,0 +1,5 @@ +#!/command/with-contenv bash + +exec nice -n "${TUBESYNC_NICE:-1}" s6-setuidgid app \ + /usr/bin/python3 /app/manage.py djangohuey \ + --queue network diff --git a/config/root/etc/s6-overlay/s6-rc.d/huey-network/type b/config/root/etc/s6-overlay/s6-rc.d/huey-network/type new file mode 100644 index 000000000..5883cff0c --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/huey-network/type @@ -0,0 +1 @@ +longrun diff --git a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run index baaf6e0cf..fafd8b8a6 100755 --- a/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run +++ b/config/root/etc/s6-overlay/s6-rc.d/tubesync-init/run @@ -4,6 +4,21 @@ groupmod -o -g "${PGID:=911}" app usermod -o -u "${PUID:=911}" app +# Ensure /config directories exist +mkdir -v -p /config/{cache,media,tasks,tubesync} + +# Copy local_settings.py for the user +if [ -f /config/tubesync/local_settings.py ] +then + # from the image for an example or comparison + cp -v -p /app/tubesync/local_settings.py \ + /config/tubesync/local_settings.py.image + + # to the image to apply the user's settings + cp -v -p /config/tubesync/local_settings.py \ + /app/tubesync/local_settings.py +fi + # Reset permissions chown -R app:app /run/app chmod -R 0700 /run/app @@ -25,6 +40,21 @@ then chmod -R 0755 /downloads fi +# Prepare for nginx logging into /config/log/nginx +mkdir -p /config/log +rm -rf /config/log/nginx.9 +for n in $(seq 8 -1 0) +do + test '!' -d "/config/log/nginx.${n}" || + mv "/config/log/nginx.${n}" "/config/log/nginx.$((1 + n))" +done ; unset -v n ; +rm -rf /config/log/nginx.0 +test '!' -d /config/log/nginx || +mv /config/log/nginx /config/log/nginx.0 +rm -rf /config/log/nginx +cp -a /var/log/nginx /config/log/ +cp -p /config/log/nginx/access.log /config/log/nginx/access.log.gz + if [ 'True' = "${TUBESYNC_DEBUG:-False}" ] then s6-setuidgid app \ diff --git a/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/huey-consumers b/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/huey-consumers new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/config/root/etc/s6-overlay/s6-rc.d/user/contents.d/huey-consumers @@ -0,0 +1 @@ + diff --git a/tubesync/common/errors.py b/tubesync/common/errors.py index 9ff44a48a..17ebec407 100644 --- a/tubesync/common/errors.py +++ b/tubesync/common/errors.py @@ -42,3 +42,8 @@ class DatabaseConnectionError(Exception): Raised when parsing or initially connecting to a database. ''' pass + + +class BgTaskWorkerError(Exception): + # Raised when the worker process is not in a normal working state. + pass diff --git a/tubesync/common/huey.py b/tubesync/common/huey.py new file mode 100644 index 000000000..66d13fc22 --- /dev/null +++ b/tubesync/common/huey.py @@ -0,0 +1,37 @@ + + +def sqlite_tasks(key, /, prefix=None): + name_fmt = 'huey_{}' + if prefix is None: + prefix = '' + if prefix: + name_fmt = f'huey_{prefix}_' + '{}' + name = name_fmt.format(key) + return dict( + huey_class='huey.SqliteHuey', + name=name, + immediate=False, + results=True, + store_none=False, + utc=True, + compression=True, + connection=dict( + filename=f'/config/tasks/{name}.db', + fsync=True, + strict_fifo=True, + ), + consumer=dict( + workers=1, + worker_type='process', + max_delay=20.0, + flush_locks=True, + scheduler_interval=10, + simple_log=False, + # verbose has three positions: + # DEBUG: True + # INFO: None + # WARNING: False + verbose=False, + ), + ) + diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index 8f7afc2c7..0c7507e96 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -7,9 +7,19 @@ import string import time from django.core.paginator import Paginator +from functools import partial +from operator import attrgetter, itemgetter +from pathlib import Path from urllib.parse import urlunsplit, urlencode, urlparse from .errors import DatabaseConnectionError +def directory_and_stem(arg_path, /, all_suffixes=False): + filepath = Path(arg_path) + stem = Path(filepath.stem) + while all_suffixes and stem.suffixes and '' != stem.suffix: + stem = Path(stem.stem) + return (filepath.parent, str(stem),) + def getenv(key, default=None, /, *, integer=False, string=True): ''' @@ -46,6 +56,51 @@ def getenv(key, default=None, /, *, integer=False, string=True): return r +def glob_quote(filestr, /): + _glob_specials = { + '?': '[?]', + '*': '[*]', + '[': '[[]', + ']': '[]]', # probably not needed, but it won't hurt + } + + if not isinstance(filestr, str): + raise TypeError(f'expected a str, got "{type(filestr)}"') + + return filestr.translate(str.maketrans(_glob_specials)) + + +def list_of_dictionaries(arg_list, /, arg_function=lambda x: x): + assert callable(arg_function) + if isinstance(arg_list, list): + _map_func = partial(lambda f, d: f(d) if isinstance(d, dict) else d, arg_function) + return (True, list(map(_map_func, arg_list)),) + return (False, arg_list,) + + +def mkdir_p(arg_path, /, *, mode=0o777): + ''' + Reminder: mode only affects the last directory + ''' + dirpath = Path(arg_path) + return dirpath.mkdir(mode=mode, parents=True, exist_ok=True) + + +def multi_key_sort(iterable, specs, /, use_reversed=False, *, item=False, attr=False, key_func=None): + result = list(iterable) + if key_func is None: + # itemgetter is the default + if item or not (item or attr): + key_func = itemgetter + elif attr: + key_func = attrgetter + for key, reverse in reversed(specs): + result.sort(key=key_func(key), reverse=reverse) + if use_reversed: + return list(reversed(result)) + return result + + def parse_database_connection_string(database_connection_string): ''' Parses a connection string in a URL style format, such as: @@ -167,6 +222,15 @@ def clean_emoji(s): return emoji.replace_emoji(s) +def seconds_to_timestr(seconds): + seconds = seconds % (24 * 3600) + hour = seconds // 3600 + seconds %= 3600 + minutes = seconds // 60 + seconds %= 60 + return '{:02d}:{:02d}:{:02d}'.format(hour, minutes, seconds) + + def time_func(func): def wrapper(*args, **kwargs): start = time.perf_counter() diff --git a/tubesync/restart_services.sh b/tubesync/restart_services.sh index bb34d8091..3c1d981bb 100755 --- a/tubesync/restart_services.sh +++ b/tubesync/restart_services.sh @@ -1,26 +1,51 @@ #!/usr/bin/env sh -dir='/run/service' +_dir='/run/service' svc_path() ( - cd "${dir}" + cd "${_dir}" && realpath -e -s "$@" ) +_bundles="$( + find '/etc/s6-overlay/s6-rc.d' -mindepth 2 -maxdepth 2 \ + -name 'type' \ + -execdir grep -F -q -e bundle '{}' ';' \ + -printf '%P\n' | \ + sed -e 's,/type$,,' ; +)" +is_a_bundle() { + local bundle + for bundle in ${_bundles} + do + if [ "$1" = "${bundle}" ] + then + return 0 + fi + done + return 1 +} + if [ 0 -eq $# ] then - set -- \ - $( cd "${dir}" && svc_path tubesync*-worker ) \ - "$( svc_path gunicorn )" \ - "$( svc_path nginx )" + set -- $(/command/s6-rc list user | grep -v -e '-init$') fi -for service in $( svc_path "$@" ) +for arg in "$@" do - printf -- 'Restarting %-28s' "${service#${dir}/}..." - _began="$( date '+%s' )" - /command/s6-svc -wr -r "${service}" - _ended="$( date '+%s' )" - printf -- '\tcompleted (in %2.1d seconds).\n' \ - "$( expr "${_ended}" - "${_began}" )" + _svcs="${arg}" + if is_a_bundle "${arg}" + then + _svcs="$(/command/s6-rc list "${arg}" | grep -v -e '-init$')" + fi + for service in $(svc_path ${_svcs}) + do + printf -- 'Restarting %-28s' "${service#${_dir}/}..." + _began="$( date '+%s' )" + /command/s6-svc -wr -r "${service}" + _ended="$( date '+%s' )" + printf -- '\tcompleted (in %2.1d seconds).\n' \ + "$( expr "${_ended}" - "${_began}" )" + done done -unset -v _began _ended service +unset -v _began _ended _svcs arg service +unset -v _bundles _dir diff --git a/tubesync/sync/matching.py b/tubesync/sync/matching.py index 4196a9f83..f5fe3fd1d 100644 --- a/tubesync/sync/matching.py +++ b/tubesync/sync/matching.py @@ -6,7 +6,7 @@ from .choices import Val, Fallback -from .utils import multi_key_sort +from common.utils import multi_key_sort from django.conf import settings diff --git a/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py b/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py new file mode 100644 index 000000000..388de67a7 --- /dev/null +++ b/tubesync/sync/migrations/0035_alter_metadata_unique_together_metadata_source_and_more.py @@ -0,0 +1,58 @@ +# Generated by Django 5.2.1 on 2025-05-28 09:57 + +import django.db.models.deletion +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('sync', '0034_source_target_schedule_and_more'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='metadata', + unique_together={('media', 'site', 'key')}, + ), + migrations.AddField( + model_name='metadata', + name='source', + field=models.ForeignKey( + blank=True, + help_text='Source from which the video was retrieved', + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name='videos', + related_query_name='video', + to='sync.source', + ), + ), + migrations.AlterField( + model_name='metadata', + name='media', + field=models.OneToOneField( + blank=True, + help_text='Media the metadata belongs to', + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name='new_metadata', + to='sync.media', + ), + ), + migrations.AlterField( + model_name='metadata', + name='retrieved', + field=models.DateTimeField( + db_index=True, + default=django.utils.timezone.now, + help_text='Date and time the metadata was retrieved', + verbose_name='retrieved', + ), + ), + migrations.AlterUniqueTogether( + name='metadata', + unique_together={('media', 'site', 'key'), ('source', 'site', 'key')}, + ), + ] + diff --git a/tubesync/sync/models/_private.py b/tubesync/sync/models/_private.py index 8cf41ce1d..5ec14d7c6 100644 --- a/tubesync/sync/models/_private.py +++ b/tubesync/sync/models/_private.py @@ -1,4 +1,3 @@ -from pathlib import Path from ..choices import Val, YouTube_SourceType # noqa @@ -11,11 +10,3 @@ def _nfo_element(nfo, label, text, /, *, attrs={}, tail='\n', char=' ', indent=2 element.tail = tail + (char * indent) return element -def directory_and_stem(arg_path, /, all_suffixes=False): - filepath = Path(arg_path) - stem = Path(filepath.stem) - while all_suffixes and stem.suffixes and '' != stem.suffix: - stem = Path(stem.stem) - stem = str(stem) - return (filepath.parent, stem,) - diff --git a/tubesync/sync/models/media.py b/tubesync/sync/models/media.py index 62f73d5d7..42d2d8919 100644 --- a/tubesync/sync/models/media.py +++ b/tubesync/sync/models/media.py @@ -17,15 +17,15 @@ from common.errors import NoFormatException from common.json import JSONEncoder from common.utils import ( - clean_filename, clean_emoji, + clean_filename, clean_emoji, directory_and_stem, + glob_quote, mkdir_p, multi_key_sort, seconds_to_timestr, ) from ..youtube import ( get_media_info as get_youtube_media_info, download_media as download_youtube_media, ) from ..utils import ( - seconds_to_timestr, parse_media_format, filter_response, - write_text_file, mkdir_p, glob_quote, multi_key_sort, + filter_response, parse_media_format, write_text_file, ) from ..matching import ( get_best_combined_format, @@ -38,9 +38,10 @@ from ._migrations import ( media_file_storage, get_media_thumb_path, get_media_file_path, ) -from ._private import _srctype_dict, _nfo_element, directory_and_stem +from ._private import _srctype_dict, _nfo_element from .media__tasks import ( - download_checklist, download_finished, wait_for_premiere, + copy_thumbnail, download_checklist, download_finished, + wait_for_premiere, write_nfo_file, ) from .source import Source @@ -566,7 +567,11 @@ def format_dict(self): @property def has_metadata(self): - return self.metadata is not None + result = self.metadata is not None + if not result: + return False + value = self.get_metadata_first_value(('id', 'display_id', 'channel_id', 'uploader_id',)) + return value is not None def metadata_clear(self, /, *, save=False): @@ -600,8 +605,10 @@ def ingest_metadata(self, data): arg_dict=data, ) md_model = self._meta.fields_map.get('new_metadata').related_model - md, created = md_model.objects.get_or_create( - media_id=self.pk, + md, created = md_model.objects.filter( + source__isnull=True, + ).get_or_create( + media=self, site=site, key=self.key, ) @@ -697,8 +704,7 @@ def refresh_formats(self): data = self.loaded_metadata metadata_seconds = data.get('epoch', None) if not metadata_seconds: - self.metadata = None - self.save(update_fields={'metadata'}) + self.metadata_clear(save=True) return False now = timezone.now() @@ -1222,7 +1228,9 @@ def rename_files(self): # add imported functions +Media.copy_thumbnail = copy_thumbnail Media.download_checklist = download_checklist Media.download_finished = download_finished Media.wait_for_premiere = wait_for_premiere +Media.write_nfo_file = write_nfo_file diff --git a/tubesync/sync/models/media__tasks.py b/tubesync/sync/models/media__tasks.py index e8b33235b..06aa46b25 100644 --- a/tubesync/sync/models/media__tasks.py +++ b/tubesync/sync/models/media__tasks.py @@ -1,5 +1,6 @@ import os from pathlib import Path +from shutil import copyfile from common.logger import log from common.errors import ( NoMetadataException, @@ -7,6 +8,7 @@ from django.utils import timezone from django.utils.translation import gettext_lazy as _ from ..choices import Val, SourceResolution +from ..utils import write_text_file def download_checklist(self, skip_checks=False): @@ -97,6 +99,43 @@ def download_finished(self, format_str, container, downloaded_filepath=None): self.downloaded_format = Val(SourceResolution.AUDIO) +def copy_thumbnail(self): + if not self.source.copy_thumbnails: + return + if not self.thumb_file_exists: + from sync.tasks import delete_task_by_media, download_media_thumbnail + args = ( str(self.pk), self.thumbnail, ) + if not args[1]: + return + delete_task_by_media('sync.tasks.download_media_thumbnail', args) + if download_media_thumbnail.now(*args): + self.refresh_from_db() + if not self.thumb_file_exists: + return + log.info( + 'Copying media thumbnail' + f' from: {self.thumb.path}' + f' to: {self.thumbpath}' + ) + # copyfile returns the destination, so we may as well pass that along + return copyfile(self.thumb.path, self.thumbpath) + + +def write_nfo_file(self): + if not self.source.write_nfo: + return + log.info(f'Writing media NFO file to: {self.nfopath}') + try: + # write_text_file returns bytes written + return write_text_file(self.nfopath, self.nfoxml) + except PermissionError as e: + msg = ( + 'A permissions problem occured when writing' + ' the new media NFO file: {}' + ) + log.exception(msg, e) + + def wait_for_premiere(self): hours = lambda td: 1+int((24*td.days)+(td.seconds/(60*60))) diff --git a/tubesync/sync/models/metadata.py b/tubesync/sync/models/metadata.py index 6f0f51ed6..637f2d9a8 100644 --- a/tubesync/sync/models/metadata.py +++ b/tubesync/sync/models/metadata.py @@ -3,8 +3,9 @@ from common.timestamp import timestamp_to_datetime from common.utils import django_queryset_generator as qs_gen from django import db +from django.utils import timezone from django.utils.translation import gettext_lazy as _ -from .media import Media +from .media import Media, Source class Metadata(db.models.Model): @@ -17,6 +18,7 @@ class Meta: verbose_name_plural = _('Metadata about Media') unique_together = ( ('media', 'site', 'key'), + ('source', 'site', 'key', ), ) get_latest_by = ["-retrieved", "-created"] @@ -27,12 +29,22 @@ class Meta: default=uuid.uuid4, help_text=_('UUID of the metadata'), ) + source = db.models.ForeignKey( + Source, + on_delete=db.models.CASCADE, + related_name="videos", + related_query_name="video", + help_text=_('Source from which the video was retrieved'), + blank=True, + null=True, + ) media = db.models.OneToOneField( Media, # on_delete=models.DO_NOTHING, on_delete=db.models.SET_NULL, related_name='new_metadata', help_text=_('Media the metadata belongs to'), + blank=True, null=True, parent_link=False, ) @@ -62,8 +74,8 @@ class Meta: ) retrieved = db.models.DateTimeField( _('retrieved'), - auto_now_add=True, db_index=True, + default=timezone.now, help_text=_('Date and time the metadata was retrieved'), ) uploaded = db.models.DateTimeField( diff --git a/tubesync/sync/models/source.py b/tubesync/sync/models/source.py index 06d652a08..ca74e6822 100644 --- a/tubesync/sync/models/source.py +++ b/tubesync/sync/models/source.py @@ -404,7 +404,7 @@ def advance_day(arg_dt, target_weekday, /): ) elif Val(IndexSchedule.EVERY_7_DAYS) > self.index_schedule: self.target_schedule = advance_hour( - when.replace(hour=1+when.hour), + when + timezone.timedelta(hours=1), self.target_schedule.hour, ) diff --git a/tubesync/sync/signals.py b/tubesync/sync/signals.py index d68a082f0..783298049 100644 --- a/tubesync/sync/signals.py +++ b/tubesync/sync/signals.py @@ -4,12 +4,13 @@ from django.conf import settings from django.db import IntegrityError from django.db.models.signals import pre_save, post_save, pre_delete, post_delete -from django.db.transaction import on_commit +from django.db.transaction import atomic, on_commit from django.dispatch import receiver from django.utils.translation import gettext_lazy as _ from background_task.signals import task_failed from background_task.models import Task from common.logger import log +from common.utils import glob_quote, mkdir_p from .models import Source, Media, Metadata from .tasks import (delete_task_by_source, delete_task_by_media, index_source_task, download_media_thumbnail, download_media_metadata, @@ -17,7 +18,7 @@ download_media, download_source_images, delete_all_media_for_source, save_all_media_for_source, rename_media, get_media_metadata_task, get_media_download_task) -from .utils import delete_file, glob_quote, mkdir_p +from .utils import delete_file from .filtering import filter_media from .choices import Val, YouTube_SourceType @@ -429,17 +430,40 @@ def media_post_delete(sender, instance, **kwargs): # Re-use the old metadata if it exists instance_qs = Metadata.objects.filter( media__isnull=True, + source__isnull=True, site=old_metadata.get(site_field) or 'Youtube', key=skipped_media.key, ) try: - instance_qs.update(media=skipped_media) + if instance_qs.count(): + with atomic(durable=False): + # clear the link to a media instance + Metadata.objects.filter(media=skipped_media).update(media=None) + # choose the oldest metadata for our key + md = instance_qs.filter( + key=skipped_media.key, + ).order_by( + 'key', + 'created', + ).first() + # set the link to a media instance only on our selected metadata + log.info(f'Reusing old metadata for "{skipped_media.key}": {skipped_media.name}') + instance_qs.filter(uuid=md.uuid).update(media=skipped_media) + # delete any metadata that we are no longer using + instance_qs.exclude(uuid=md.uuid).delete() + except IntegrityError: - # Delete the new metadata - Metadata.objects.filter(media=skipped_media).delete() + # this probably won't happen, but try it without a transaction try: - instance_qs.update(media=skipped_media) - except IntegrityError: - # Delete the old metadata if it still failed + # clear the link to a media instance + Metadata.objects.filter(media=skipped_media).update(media=None) + # keep one metadata + md = instance_qs.order_by('created').first() + instance_qs.filter(uuid=md.uuid).update(media=skipped_media) + except IntegrityError as e: + log.exception(f'media_post_delete: could not update selected metadata: {e}') + finally: + log.debug(f'Deleting metadata for "{skipped_media.key}": {skipped_media.pk}') + # delete the old metadata instance_qs.delete() diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index c5903bb06..c9f60d23a 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -10,6 +10,7 @@ import requests import time import uuid +from collections import deque as queue from io import BytesIO from hashlib import sha1 from pathlib import Path @@ -19,7 +20,7 @@ from django.conf import settings from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile -from django.db import DatabaseError, IntegrityError +from django.db import DatabaseError from django.db.transaction import atomic from django.utils import timezone from django.utils.translation import gettext_lazy as _ @@ -27,15 +28,15 @@ from background_task.exceptions import InvalidTaskError from background_task.models import Task, CompletedTask from common.logger import log -from common.errors import ( NoFormatException, NoMediaException, - NoThumbnailException, - DownloadFailedException, ) +from common.errors import ( BgTaskWorkerError, DownloadFailedException, + NoFormatException, NoMediaException, + NoThumbnailException, ) from common.utils import ( django_queryset_generator as qs_gen, - remove_enclosed, ) + remove_enclosed, seconds_to_timestr, ) from .choices import Val, TaskQueue -from .models import Source, Media, MediaServer -from .utils import ( get_remote_image, resize_image_to_height, - write_text_file, filter_response, seconds_to_timestr, ) +from .models import Source, Media, MediaServer, Metadata +from .utils import (get_remote_image, resize_image_to_height, + filter_response, ) from .youtube import YouTubeError db_vendor = db.connection.vendor @@ -56,6 +57,7 @@ def map_task_to_instance(task): because UUID's are incompatible with background_task's "creator" feature. ''' TASK_MAP = { + 'sync.tasks.migrate_to_metadata': Media, 'sync.tasks.index_source_task': Source, 'sync.tasks.check_source_directory_exists': Source, 'sync.tasks.download_media_thumbnail': Media, @@ -302,6 +304,70 @@ def cleanup_removed_media(source, video_keys): schedule_media_servers_update() +def save_db_batch(qs, objs, fields, /): + assert hasattr(qs, 'bulk_update') + assert callable(qs.bulk_update) + assert hasattr(objs, '__len__') + assert callable(objs.__len__) + assert isinstance(fields, (tuple, list, set, frozenset)) + + num_updated = 0 + num_objs = len(objs) + with atomic(durable=False): + num_updated = qs.bulk_update(objs=objs, fields=fields) + if num_objs == num_updated: + # this covers at least: list, set, deque + if hasattr(objs, 'clear') and callable(objs.clear): + objs.clear() + return num_updated + + +@background(schedule=dict(priority=20, run_at=60), queue=Val(TaskQueue.DB), remove_existing_tasks=True) +def migrate_to_metadata(media_id): + try: + media = Media.objects.get(pk=media_id) + except Media.DoesNotExist as e: + # Task triggered but the media no longer exists, do nothing + log.error(f'Task migrate_to_metadata(pk={media_id}) called but no ' + f'media exists with ID: {media_id}') + raise InvalidTaskError(_('no such media')) from e + + try: + data = Metadata.objects.get( + media__isnull=True, + source=media.source, + key=media.key, + ) + except Metadata.DoesNotExist as e: + raise InvalidTaskError(_('no indexed data to migrate to metadata')) from e + + video = data.value + fields = lambda f, m: m.get_metadata_field(f) + timestamp = video.get(fields('timestamp', media), None) + for key in ('epoch', 'availability', 'extractor_key',): + field = fields(key, media) + value = video.get(field) + existing_value = media.get_metadata_first_value(key) + if value is None: + if 'epoch' == key: + value = timestamp + elif 'extractor_key' == key: + value = data.site + if value is not None: + if existing_value and ('epoch' == key or value == existing_value): + continue + media.save_to_metadata(field, value) + + +@background(schedule=dict(priority=0, run_at=0), queue=Val(TaskQueue.NET), remove_existing_tasks=False) +def wait_for_database_queue(): + worker_down_path = Path('/run/service/tubesync-db-worker/down') + while Task.objects.unlocked(timezone.now()).filter(queue=Val(TaskQueue.DB)).count() > 0: + time.sleep(5) + if worker_down_path.exists() and worker_down_path.is_file(): + raise BgTaskWorkerError(_('queue worker stopped')) + + @background(schedule=dict(priority=20, run_at=30), queue=Val(TaskQueue.NET), remove_existing_tasks=True) def index_source_task(source_id): ''' @@ -322,13 +388,12 @@ def index_source_task(source_id): # update the target schedule column source.task_run_at_dt # Reset any errors - # TODO: determine if this affects anything source.has_failed = False - save_model(source) # Index the source videos = source.index_media() if not videos: - # TODO: Record this error in source.has_failed ? + source.has_failed = True + save_model(source) raise NoMediaException(f'Source "{source}" (ID: {source_id}) returned no ' f'media to index, is the source key valid? Check the ' f'source configuration is correct and that the source ' @@ -336,8 +401,31 @@ def index_source_task(source_id): # Got some media, update the last crawl timestamp source.last_crawl = timezone.now() save_model(source) + wait_for_database_queue( + priority=19, # the indexing task uses 20 + verbose_name=_('Waiting for database tasks to complete'), + ) + wait_for_database_queue( + priority=29, # the checking task uses 30 + queue=Val(TaskQueue.FS), + verbose_name=_('Delaying checking all media for database tasks'), + ) + delete_task_by_source('sync.tasks.save_all_media_for_source', source.pk) num_videos = len(videos) log.info(f'Found {num_videos} media items for source: {source}') + tvn_format = '{:,}' + f'/{num_videos:,}' + db_batch_data = queue(list(), maxlen=50) + db_fields_data = frozenset(( + 'retrieved', + 'site', + 'value', + )) + db_batch_media = queue(list(), maxlen=10) + db_fields_media = frozenset(( + 'duration', + 'published', + 'title', + )) fields = lambda f, m: m.get_metadata_field(f) task = get_source_index_task(source_id) if task: @@ -346,7 +434,6 @@ def index_source_task(source_id): valid='0123456789/,', end=task.verbose_name.find('Index'), ) - tvn_format = '{:,}' + f'/{num_videos:,}' vn = 0 video_keys = set() while len(videos) > 0: @@ -358,15 +445,17 @@ def index_source_task(source_id): # Video has no unique key (ID), it can't be indexed continue video_keys.add(key) + if len(db_batch_data) == db_batch_data.maxlen: + save_db_batch(Metadata.objects, db_batch_data, db_fields_data) + if len(db_batch_media) == db_batch_media.maxlen: + save_db_batch(Media.objects, db_batch_media, db_fields_media) update_task_status(task, tvn_format.format(vn)) - # media, new_media = Media.objects.get_or_create(key=key, source=source) - try: - media = Media.objects.get(key=key, source=source) - except Media.DoesNotExist: - media = Media(key=key) - media.source = source - media.duration = float(video.get(fields('duration', media), None) or 0) or None - media.title = str(video.get(fields('title', media), ''))[:200] + media_defaults = dict() + # create a dummy instance to use its functions + media = Media(source=source, key=key) + media_defaults['duration'] = float(video.get(fields('duration', media), None) or 0) or None + media_defaults['title'] = str(video.get(fields('title', media), ''))[:200] + site = video.get(fields('ie_key', media), None) timestamp = video.get(fields('timestamp', media), None) try: published_dt = media.ts_to_dt(timestamp) @@ -374,46 +463,75 @@ def index_source_task(source_id): pass else: if published_dt: - media.published = published_dt - try: - media.save() - except IntegrityError as e: - log.error(f'Index media failed: {source} / {media} with "{e}"') - else: + media_defaults['published'] = published_dt + # Retrieve or create the actual media instance + media, new_media = source.media_source.only( + 'uuid', + 'source', + 'key', + *db_fields_media, + ).get_or_create(defaults=media_defaults, source=source, key=key) + db_batch_media.append(media) + data, new_data = source.videos.defer('value').filter( + media__isnull=True, + ).get_or_create(source=source, key=key) + if site: + data.site = site + data.retrieved = source.last_crawl + data.value = video + db_batch_data.append(data) + vn_fmt = _('Updating metadata from indexing results for: "{}": {}') + migrate_to_metadata( + str(media.pk), + verbose_name=vn_fmt.format(media.key, media.name), + ) + if not new_media: + # update the existing media + for key, value in media_defaults.items(): + setattr(media, key, value) log.debug(f'Indexed media: {vn}: {source} / {media}') + else: # log the new media instances - new_media_instance = ( - # new_media or - media.created and - source.last_crawl and - media.created >= source.last_crawl - ) - if new_media_instance: - log.info(f'Indexed new media: {source} / {media}') - log.info(f'Scheduling tasks to download thumbnail for: {media.key}') - thumbnail_fmt = 'https://i.ytimg.com/vi/{}/{}default.jpg' - vn_fmt = _('Downloading {} thumbnail for: "{}": {}') - for prefix in ('hq', 'sd', 'maxres',): - thumbnail_url = thumbnail_fmt.format( - media.key, - prefix, - ) - download_media_thumbnail( - str(media.pk), - thumbnail_url, - verbose_name=vn_fmt.format(prefix, media.key, media.name), - ) - log.info(f'Scheduling task to download metadata for: {media.url}') - verbose_name = _('Downloading metadata for: "{}": {}') - download_media_metadata( + log.info(f'Indexed new media: {source} / {media}') + log.info(f'Scheduling tasks to download thumbnail for: {media.key}') + thumbnail_fmt = 'https://i.ytimg.com/vi/{}/{}default.jpg' + vn_fmt = _('Downloading {} thumbnail for: "{}": {}') + for num, prefix in enumerate(('hq', 'sd', 'maxres',)): + thumbnail_url = thumbnail_fmt.format( + media.key, + prefix, + ) + download_media_thumbnail( str(media.pk), - verbose_name=verbose_name.format(media.key, media.name), + thumbnail_url, + schedule=dict(run_at=10+(300*num)), + verbose_name=vn_fmt.format(prefix, media.key, media.name), ) + log.info(f'Scheduling task to download metadata for: {media.url}') + verbose_name = _('Downloading metadata for: "{}": {}') + download_media_metadata( + str(media.pk), + schedule=dict(priority=35), + verbose_name=verbose_name.format(media.key, media.name), + ) # Reset task.verbose_name to the saved value update_task_status(task, None) + # Update any remaining items in the batches + save_db_batch(Metadata.objects, db_batch_data, db_fields_data) + save_db_batch(Media.objects, db_batch_media, db_fields_media) # Cleanup of media no longer available from the source cleanup_removed_media(source, video_keys) + # Clear references to indexed data videos = video = None + db_batch_data.clear() + db_batch_media.clear() + # Trigger any signals that we skipped with batched updates + vn_fmt = _('Checking all media for "{}"') + save_all_media_for_source( + str(source.pk), + schedule=dict(run_at=60), + verbose_name=vn_fmt.format(source.name), + ) @background(schedule=dict(priority=0, run_at=0), queue=Val(TaskQueue.FS)) @@ -691,27 +809,8 @@ def download_media(media_id, override=False): # Media has been downloaded successfully media.download_finished(format_str, container, filepath) save_model(media) - # If selected, copy the thumbnail over as well - if media.source.copy_thumbnails: - if not media.thumb_file_exists: - thumbnail_url = media.thumbnail - if thumbnail_url: - args = ( str(media.pk), thumbnail_url, ) - delete_task_by_media('sync.tasks.download_media_thumbnail', args) - if download_media_thumbnail.now(*args): - media.refresh_from_db() - if media.thumb_file_exists: - log.info(f'Copying media thumbnail from: {media.thumb.path} ' - f'to: {media.thumbpath}') - copyfile(media.thumb.path, media.thumbpath) - # If selected, write an NFO file - if media.source.write_nfo: - log.info(f'Writing media NFO file to: {media.nfopath}') - try: - write_text_file(media.nfopath, media.nfoxml) - except PermissionError as e: - log.warn(f'A permissions problem occured when writing the new media NFO file: {e.msg}') - pass + media.copy_thumbnail() + media.write_nfo_file() # Schedule a task to update media servers schedule_media_servers_update() diff --git a/tubesync/sync/templates/sync/tasks-completed.html b/tubesync/sync/templates/sync/tasks-completed.html index 52f576df0..ec2a0aa82 100644 --- a/tubesync/sync/templates/sync/tasks-completed.html +++ b/tubesync/sync/templates/sync/tasks-completed.html @@ -1,4 +1,4 @@ -{% extends 'base.html' %} +{% extends 'base.html' %}{% load filters %} {% block headtitle %}Tasks - Completed{% endblock %} @@ -14,20 +14,18 @@

Completed tasks

{% for task in tasks %} - {% if task.has_error %} + {% if task.has_error %} {{ task.verbose_name }}
Queue: "{{ task.queue }}"
Error: "{{ task.error_message }}"
- Task ran at {{ task.run_at|date:'Y-m-d H:i:s' }} -
{% else %} - {{ task.verbose_name }}
Queue: "{{ task.queue }}"
- Task ran at {{ task.run_at|date:'Y-m-d H:i:s' }} -
{% endif %} + Task locked for: {{ task.run_at|sub:task.locked_at|timedelta }}
+ Task locked at {{ task.locked_at|date:'Y-m-d H:i:s' }}
+ Task ended at {{ task.run_at|date:'Y-m-d H:i:s' }} {% empty %} There have been no completed tasks{% if source %} that match the specified source filter{% endif %}. diff --git a/tubesync/sync/templatetags/filters.py b/tubesync/sync/templatetags/filters.py index 444969e90..ee8f9aa24 100644 --- a/tubesync/sync/templatetags/filters.py +++ b/tubesync/sync/templatetags/filters.py @@ -1,5 +1,6 @@ from django import template from django.template.defaultfilters import filesizeformat +from math import ceil register = template.Library() @@ -23,3 +24,64 @@ def sub(value, arg): except Exception: return "" + +@register.filter +def timedelta(value, arg=None, /, *, fmt_2=None): + if hasattr(value, 'total_seconds') and callable(value.total_seconds): + seconds_total = value.total_seconds() + elif hasattr(value, 'seconds'): + seconds_total = value.seconds + (value.days * 24 * 60 * 60) + else: + seconds_total = value + + dynamic_arg = False + if arg is None: + if seconds_total < 1.0: + return f'{seconds_total:.6f} seconds' + dynamic_arg = True + arg = '{hours2}:{minutes2}:{seconds2}' + + if fmt_2 is None: + fmt_2 = '{:02d}' + + seconds_total = ceil(seconds_total) + seconds = seconds_total % 60 + + minutes_total = seconds_total // 60 + minutes = minutes_total % 60 + + hours_total = minutes_total // 60 + hours = hours_total % 24 + + days_total = hours_total // 24 + days = days_total % 365 + + years_total = days_total // 365 + years = years_total + + if dynamic_arg: + prefix_years = prefix_days = '' + if years_total > 0: + prefix_years = '{years_total} years, ' + if prefix_years and days_total > 0: + prefix_days = '{days} days, ' + elif days_total > 0: + prefix_days = '{days_total} days, ' + arg = prefix_years + prefix_days + arg + + return arg.format(**{ + 'seconds': seconds, + 'seconds2': fmt_2.format(seconds), + 'minutes': minutes, + 'minutes2': fmt_2.format(minutes), + 'hours': hours, + 'hours2': fmt_2.format(hours), + 'days': days, + 'years': years, + 'seconds_total': seconds_total, + 'minutes_total': minutes_total, + 'hours_total': hours_total, + 'days_total': days_total, + 'years_total': years_total, + }) + diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 47089673e..9d5ce9910 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -354,6 +354,7 @@ def test_media(self): # Add some media test_minimal_metadata = ''' { + "channel_id":"testkey", "thumbnail":"https://example.com/thumb.jpg", "formats": [{ "format_id":"251", diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py index fc7874fd7..cbd14eab8 100644 --- a/tubesync/sync/utils.py +++ b/tubesync/sync/utils.py @@ -2,11 +2,11 @@ import re import math from copy import deepcopy -from operator import attrgetter, itemgetter from pathlib import Path from tempfile import NamedTemporaryFile import requests from PIL import Image +from common.utils import list_of_dictionaries from django.conf import settings from urllib.parse import urlsplit, parse_qs from django.forms import ValidationError @@ -95,20 +95,6 @@ def resize_image_to_height(image, width, height): return image -def glob_quote(filestr): - _glob_specials = { - '?': '[?]', - '*': '[*]', - '[': '[[]', - ']': '[]]', # probably not needed, but it won't hurt - } - - if not isinstance(filestr, str): - raise TypeError(f'filestr must be a str, got "{type(filestr)}"') - - return filestr.translate(str.maketrans(_glob_specials)) - - def file_is_editable(filepath): ''' Checks that a file exists and the file is in an allowed predefined tuple of @@ -130,14 +116,6 @@ def file_is_editable(filepath): return False -def mkdir_p(arg_path, mode=0o777): - ''' - Reminder: mode only affects the last directory - ''' - dirpath = Path(arg_path) - return dirpath.mkdir(mode=mode, parents=True, exist_ok=True) - - def write_text_file(filepath, filedata): if not isinstance(filedata, str): raise TypeError(f'filedata must be a str, got "{type(filedata)}"') @@ -162,30 +140,6 @@ def delete_file(filepath): return False -def seconds_to_timestr(seconds): - seconds = seconds % (24 * 3600) - hour = seconds // 3600 - seconds %= 3600 - minutes = seconds // 60 - seconds %= 60 - return '{:02d}:{:02d}:{:02d}'.format(hour, minutes, seconds) - - -def multi_key_sort(iterable, specs, /, use_reversed=False, *, item=False, attr=False, key_func=None): - result = list(iterable) - if key_func is None: - # itemgetter is the default - if item or not (item or attr): - key_func = itemgetter - elif attr: - key_func = attrgetter - for key, reverse in reversed(specs): - result.sort(key=key_func(key), reverse=reverse) - if use_reversed: - return list(reversed(result)) - return result - - def normalize_codec(codec_str): result = str(codec_str).upper() parts = result.split('.') @@ -201,17 +155,6 @@ def normalize_codec(codec_str): return result -def list_of_dictionaries(arg_list, arg_function=lambda x: x): - assert callable(arg_function) - if isinstance(arg_list, list): - def _call_func_with_dict(arg_dict): - if isinstance(arg_dict, dict): - return arg_function(arg_dict) - return arg_dict - return (True, list(map(_call_func_with_dict, arg_list)),) - return (False, arg_list,) - - def _url_keys(arg_dict, filter_func): result = {} if isinstance(arg_dict, dict): diff --git a/tubesync/sync/views.py b/tubesync/sync/views.py index 5f13877f6..493098cdb 100644 --- a/tubesync/sync/views.py +++ b/tubesync/sync/views.py @@ -20,13 +20,13 @@ from django.utils import timezone from django.utils.translation import gettext_lazy as _ from common.timestamp import timestamp_to_datetime -from common.utils import append_uri_params +from common.utils import append_uri_params, mkdir_p, multi_key_sort from background_task.models import Task, CompletedTask from .models import Source, Media, MediaServer from .forms import (ValidateSourceForm, ConfirmDeleteSourceForm, RedownloadMediaForm, SkipMediaForm, EnableMediaForm, ResetTasksForm, ScheduleTaskForm, ConfirmDeleteMediaServerForm, SourceForm) -from .utils import validate_url, delete_file, multi_key_sort, mkdir_p +from .utils import delete_file, validate_url from .tasks import (map_task_to_instance, get_error_message, get_source_completed_tasks, get_media_download_task, delete_task_by_media, index_source_task, diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 7afdf337b..7228b7546 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -7,6 +7,7 @@ import os from common.logger import log +from common.utils import mkdir_p from copy import deepcopy from pathlib import Path from tempfile import TemporaryDirectory @@ -15,7 +16,6 @@ from django.conf import settings from .choices import Val, FileExtension from .hooks import postprocessor_hook, progress_hook -from .utils import mkdir_p import yt_dlp import yt_dlp.patch.check_thumbnails import yt_dlp.patch.fatal_http_errors @@ -213,6 +213,16 @@ class NoDefaultValue: pass # a unique Singleton, that may be checked for later opts.update({ 'sleep_interval_requests': 2 * settings.BACKGROUND_TASK_ASYNC_THREADS, }) + try: + info_json_path = Path(info_json).resolve(strict=False) + except: + pass + else: + opts['paths'].update({ + 'infojson': user_set('infojson', opts['paths'], str(info_json_path)) + }) + if 'infojson' not in opts['paths'].keys(): + opts.update({'writeinfojson': False}) if start: log.debug(f'get_media_info: used date range: {opts["daterange"]} for URL: {url}') response = {} diff --git a/tubesync/tubesync/settings.py b/tubesync/tubesync/settings.py index 7a824e7c0..f5689dd13 100644 --- a/tubesync/tubesync/settings.py +++ b/tubesync/tubesync/settings.py @@ -1,5 +1,6 @@ from django import VERSION as DJANGO_VERSION from pathlib import Path +from common.huey import sqlite_tasks from common.utils import getenv @@ -24,6 +25,7 @@ 'django.contrib.humanize', 'sass_processor', 'background_task', + 'django_huey', 'common', 'sync', ] @@ -47,6 +49,22 @@ FORCE_SCRIPT_NAME = None +DJANGO_HUEY = { + 'default': 'network', + 'queues': { + 'database': sqlite_tasks('database'), + 'filesystem': sqlite_tasks('filesystem'), + 'limited': sqlite_tasks('limited', prefix='net'), + 'network': sqlite_tasks('network'), + }, +} +for django_huey_queue in DJANGO_HUEY['queues'].values(): + connection = django_huey_queue.get('connection') + if connection: + filepath = Path('/.' + connection.get('filename') or '').resolve(strict=False) + filepath.parent.mkdir(exist_ok=True, parents=True) + + TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates',