Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ Before you submit a pull request, ensure you follow the testing and linting inst
* [Task] 3.40 or higher
* [uv] 0.7.10 or higher

### macOS

The exported tasks use GNU utilities that are not always pre-installed on macOS. You may need to
install the following brew packages and add their executables to your PATH:

* [coreutils]\: `md5sum`
* [gnu-tar]\: `gtar`

## Testing

To run all tests:
Expand Down Expand Up @@ -42,5 +50,7 @@ To clean up any generated files:
task clean
```

[coreutils]: https://formulae.brew.sh/formula/coreutils
[gnu-tar]: https://formulae.brew.sh/formula/gnu-tar
[Task]: https://taskfile.dev/
[uv]: https://docs.astral.sh/uv
126 changes: 88 additions & 38 deletions exports/taskfiles/utils/checksum.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
version: "3"

set: ["u", "pipefail"]
shopt: ["globstar"]

tasks:

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

# Compute the checksum of the given path include/exclude patterns, saving the result to
# `CHECKSUM_FILE`. The calling task can set `FAIL` to "false" if they wish to continue if checksum
# computation fails.
#
# @param {string} CHECKSUM_FILE
# @param {string[]} INCLUDE_PATTERNS Path wildcard patterns to compute the checksum for.
# @param {string[]} [EXCLUDE_PATTERNS] Path wildcard patterns, relative to any `INCLUDE_PATTERNS`,
# to exclude from the checksum.
# @param {string} [FAIL="true"] If set to "false" the task will not fail.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# @param {string} [FAIL="true"] If set to "false" the task will not fail.
# @param {string} [FAIL="true"] If set to "false", the task will not fail.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to change this into IGNORE_ERROR with default = false?

  1. the name FAIL can be confusing. i feel IGNORE_ERROR can be more self-explanatory
  2. it's usually better to use false as the default value of any Booleans
  3. we can early return right after rm -f "{{.CHECKSUM_FILE}}"

compute:
desc: "Tries to compute a checksum for the given paths and output it to a file."
internal: true
label: "{{.TASK}}-{{.CHECKSUM_FILE}}"
silent: true
vars:
EXCLUDE_PATTERNS:
ref: "default (list) .EXCLUDE_PATTERNS"
FAIL: "{{if eq \"false\" .FAIL}}false{{else}}true{{end}}"

ARCHIVER: "{{if eq OS \"darwin\"}}gtar{{else}}tar{{end}}"
TMP_ERR_LOG: "{{.CHECKSUM_FILE}}.log.tmp"
requires:
vars: ["CHECKSUM_FILE", "INCLUDE_PATTERNS"]
vars:
- "CHECKSUM_FILE"
- "INCLUDE_PATTERNS"
cmds:
- "mkdir -p '{{ dir .CHECKSUM_FILE }}'"
# We explicitly set `--no-anchored` and `--wildcards` to make the inclusion behaviour match
Expand All @@ -25,58 +36,97 @@ tasks:
# input patterns cannot be quoted since they're evaluated by the shell and the results are
# passed to `tar` as arguments. If the input patterns are passed to `tar` with quotes, the
# pattern won't be evaluated and will instead be treated literally.
- >-
tar
--create
--file -
--group 0
--mtime "UTC 1970-01-01"
--numeric-owner
--owner 0
--sort name
--no-anchored
--wildcards
{{- range .EXCLUDE_PATTERNS}}
--exclude="{{.}}"
{{- end}}
{{- range .INCLUDE_PATTERNS}}
{{.}}
{{- end}}
2> /dev/null
| md5sum > {{.CHECKSUM_FILE}}
# Ignore errors so that dependent tasks don't fail
ignore_error: true
- defer: "rm -f '{{.TMP_ERR_LOG}}'"
- |-
if ! \
{{.ARCHIVER}} \
--create \
--file - \
--group 0 \
--mtime "UTC 1970-01-01" \
--numeric-owner \
--owner 0 \
--sort name \
--no-anchored \
--wildcards \
{{- range .EXCLUDE_PATTERNS}}
--exclude="{{.}}" \
{{- end}}
{{- range .INCLUDE_PATTERNS}}
{{.}} \
{{- end}}
2> "{{.TMP_ERR_LOG}}" \
| md5sum > "{{.CHECKSUM_FILE}}" \
Comment on lines +42 to +59
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indent

Suggested change
{{.ARCHIVER}} \
--create \
--file - \
--group 0 \
--mtime "UTC 1970-01-01" \
--numeric-owner \
--owner 0 \
--sort name \
--no-anchored \
--wildcards \
{{- range .EXCLUDE_PATTERNS}}
--exclude="{{.}}" \
{{- end}}
{{- range .INCLUDE_PATTERNS}}
{{.}} \
{{- end}}
2> "{{.TMP_ERR_LOG}}" \
| md5sum > "{{.CHECKSUM_FILE}}" \
{{.ARCHIVER}} \
--create \
--file - \
--group 0 \
--mtime "UTC 1970-01-01" \
--numeric-owner \
--owner 0 \
--sort name \
--no-anchored \
--wildcards \
{{- range .EXCLUDE_PATTERNS}}
--exclude="{{.}}" \
{{- end}}
{{- range .INCLUDE_PATTERNS}}
{{.}} \
{{- end}}
2> "{{.TMP_ERR_LOG}}" \
| md5sum > "{{.CHECKSUM_FILE}}" \

Copy link
Member

@junhaoliao junhaoliao Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or maybe the 4-space indents are intentional? i can't be sure what are the conventions when we mix shell commands with go templates...

the code is fairly readable either way anyways

; then
rm -f "{{.CHECKSUM_FILE}}"
{{- if eq "true" .FAIL}}
printf "[{{.TASK}} error] failed with:\n%s\n" "$(cat {{.TMP_ERR_LOG}})"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
printf "[{{.TASK}} error] failed with:\n%s\n" "$(cat {{.TMP_ERR_LOG}})"
printf "[{{.TASK}} error] failed with:\n%s\n" "$(cat '{{.TMP_ERR_LOG}}')"

exit 1
{{- else}}
exit 0
{{- end}}
fi

# Validates that the checksum computed from the given include/exclude path patterns matches the
# reference checksum stored in the given file. If validation fails, the checksum file is deleted,
# but the task succeeds so dependent tasks that list the checksum file under `generates` will
# rerun automatically. The calling task can set `FAIL` to "true" to make this task fail when the
# validation is expected to succeed.
#
# @param {string} CHECKSUM_FILE
# @param {string[]} INCLUDE_PATTERNS Path wildcard patterns to validate the checksum for.
# @param {string[]} [EXCLUDE_PATTERNS] Path wildcard patterns, relative to any `INCLUDE_PATTERNS`,
# to exclude from the checksum.
# @param {string} [FAIL="false"] If set to "true", the task fails when checksums mismatch.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would FAIL_ON_ERROR be more self-explanatory?

validate:
desc: "Validates the checksum of the given directory matches the checksum in the given file, or
deletes the checksum file otherwise."
internal: true
label: "{{.TASK}}-{{.CHECKSUM_FILE}}"
silent: true
vars:
FAIL: "{{if eq \"true\" .FAIL}}true{{else}}false{{end}}"
TMP_CHECKSUM_FILE: "{{.CHECKSUM_FILE}}.tmp"
TMP_ERR_LOG: "{{.CHECKSUM_FILE}}.log.tmp"
requires:
vars: ["CHECKSUM_FILE", "INCLUDE_PATTERNS"]
vars:
- "CHECKSUM_FILE"
- "INCLUDE_PATTERNS"
cmds:
- task: "compute"
vars:
CHECKSUM_FILE: "{{.TMP_CHECKSUM_FILE}}"
INCLUDE_PATTERNS:
ref: ".INCLUDE_PATTERNS"
EXCLUDE_PATTERNS:
ref: "default (list) .EXCLUDE_PATTERNS"
CHECKSUM_FILE: "{{.TMP_CHECKSUM_FILE}}"
- defer: "rm -f '{{.TMP_CHECKSUM_FILE}}'"
# Check that all paths exist and the checksum matches; otherwise delete the checksum file.
ref: ".EXCLUDE_PATTERNS"
FAIL: "false"
- defer: |-
rm -f "{{.TMP_CHECKSUM_FILE}}"
rm -f "{{.TMP_ERR_LOG}}"
- |-
(
{{- range .INCLUDE_PATTERNS}}
for path in {{.}}; do
test -e "$path"
done
for path in {{.}}; do
test -e "${path}" \
|| (
echo "Include path does not exist: ${path}" > "{{$.TMP_ERR_LOG}}"
exit 1
)
Comment on lines +110 to +113
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
|| (
echo "Include path does not exist: ${path}" > "{{$.TMP_ERR_LOG}}"
exit 1
)
|| (
echo "Include path does not exist: ${path}" > "{{$.TMP_ERR_LOG}}"
exit 1
)

done && \
{{- end}}
diff -q "{{.TMP_CHECKSUM_FILE}}" "{{.CHECKSUM_FILE}}" 2> /dev/null
) || rm -f "{{.CHECKSUM_FILE}}"
(
cmp -s "{{.TMP_CHECKSUM_FILE}}" "{{.CHECKSUM_FILE}}" \
|| (
echo "cmp failed for '{{.TMP_CHECKSUM_FILE}}' '{{.CHECKSUM_FILE}}'" \
> "{{.TMP_ERR_LOG}}"
exit 1
)
)
Comment on lines +116 to +123
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
(
cmp -s "{{.TMP_CHECKSUM_FILE}}" "{{.CHECKSUM_FILE}}" \
|| (
echo "cmp failed for '{{.TMP_CHECKSUM_FILE}}' '{{.CHECKSUM_FILE}}'" \
> "{{.TMP_ERR_LOG}}"
exit 1
)
)
(
cmp -s "{{.TMP_CHECKSUM_FILE}}" "{{.CHECKSUM_FILE}}" \
|| (
echo "cmp failed for '{{.TMP_CHECKSUM_FILE}}' '{{.CHECKSUM_FILE}}'" \
> "{{.TMP_ERR_LOG}}"
exit 1
)
)

) \
|| (
{{- if eq "true" .FAIL}}
printf "[{{.TASK}} error] failed with:\n%s\n" "$(cat {{.TMP_ERR_LOG}})"
exit 1
{{- else}}
rm -f "{{.CHECKSUM_FILE}}"
{{- end}}
)
Comment on lines +125 to +132
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
|| (
{{- if eq "true" .FAIL}}
printf "[{{.TASK}} error] failed with:\n%s\n" "$(cat {{.TMP_ERR_LOG}})"
exit 1
{{- else}}
rm -f "{{.CHECKSUM_FILE}}"
{{- end}}
)
|| (
{{- if eq "true" .FAIL}}
printf "[{{.TASK}} error] failed with:\n%s\n" "$(cat {{.TMP_ERR_LOG}})"
exit 1
{{- else}}
rm -f "{{.CHECKSUM_FILE}}"
{{- end}}
)

148 changes: 148 additions & 0 deletions taskfiles/checksum/tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
version: "3"

includes:
checksum:
internal: true
taskfile: "../../exports/taskfiles/utils/checksum.yaml"

tasks:
default:
cmds:
- task: "checksum-test-rerun"
- task: "checksum-test-skip"
- task: "checksum-test-update"

checksum-test-rerun:
vars:
OUTPUT_DIR: "{{.G_OUTPUT_DIR}}/{{.TASK | replace \":\" \"#\"}}"
SRC_DIR: "{{.OUTPUT_DIR}}/src"

CHECKSUM_FILE: "{{.SRC_DIR}}.md5"
CHECKSUM_FILE_REF: "{{.CHECKSUM_FILE}}.ref"
FILE_0: "{{.SRC_DIR}}/0.txt"
FILE_1: "{{.SRC_DIR}}/1.txt"
cmds:
- task: "checksum-test-init"
vars:
OUTPUT_DIR: "{{.OUTPUT_DIR}}"
- task: "create-dir-with-checksum"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
FILE_PATH: "{{.FILE_0}}"
- "mv '{{.CHECKSUM_FILE}}' '{{.CHECKSUM_FILE_REF}}'"
- task: "create-dir-with-checksum"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
FILE_PATH: "{{.FILE_1}}"

# Test create-dir-with-checksum ran the second time and created a different checksum.
- "test ! -e '{{.FILE_0}}'"
- "test -e '{{.FILE_1}}'"
- |-
if ! cmp -s '{{.CHECKSUM_FILE}}' '{{.CHECKSUM_FILE_REF}}'; then
exit 0
fi
exit 1
checksum-test-skip:
vars:
OUTPUT_DIR: "{{.G_OUTPUT_DIR}}/{{.TASK | replace \":\" \"#\"}}"
SRC_DIR: "{{.OUTPUT_DIR}}/src"

CHECKSUM_FILE: "{{.SRC_DIR}}.md5"
CHECKSUM_MOD_TS: "{{.CHECKSUM_FILE}}-mod-ts.txt"
FILE_0: "{{.SRC_DIR}}/0.txt"
FILE_1: "{{.SRC_DIR}}/1.txt"
cmds:
- task: "checksum-test-init"
vars:
OUTPUT_DIR: "{{.OUTPUT_DIR}}"
- task: "create-dir-with-checksum"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
FILE_PATH: "{{.FILE_0}}"
- "date -r '{{.CHECKSUM_FILE}}' > '{{.CHECKSUM_MOD_TS}}'"
- task: "create-dir-with-checksum"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
FILE_PATH: "{{.FILE_1}}"

# Test create-dir-with-checksum didn't run the second time and the checksum is unmodified.
- "test -e '{{.FILE_0}}'"
- "test ! -e '{{.FILE_1}}'"
- "cmp -s '{{.CHECKSUM_MOD_TS}}' <(date -r '{{.CHECKSUM_FILE}}')"

checksum-test-update:
vars:
OUTPUT_DIR: "{{.G_OUTPUT_DIR}}/{{.TASK | replace \":\" \"#\"}}"
SRC_DIR: "{{.OUTPUT_DIR}}/src"

CHECKSUM_FILE: "{{.SRC_DIR}}.md5"
CHECKSUM_FILE_REF0: "{{.CHECKSUM_FILE}}.ref0"
CHECKSUM_FILE_REF1: "{{.CHECKSUM_FILE}}.ref1"
FILE_0: "{{.SRC_DIR}}/0.txt"
FILE_1: "{{.SRC_DIR}}/1.txt"
cmds:
- task: "checksum-test-init"
vars:
OUTPUT_DIR: "{{.OUTPUT_DIR}}"
- task: "create-dir-with-checksum"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
FILE_PATH: "{{.FILE_0}}"
- "cp '{{.CHECKSUM_FILE}}' '{{.CHECKSUM_FILE_REF0}}'"

- "cat '{{.CHECKSUM_FILE}}' > '{{.FILE_0}}'"
- task: "checksum:compute"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
INCLUDE_PATTERNS: ["{{.SRC_DIR}}"]
- "cp '{{.CHECKSUM_FILE}}' '{{.CHECKSUM_FILE_REF1}}'"

- task: "create-dir-with-checksum"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
FILE_PATH: "{{.FILE_1}}"

# Test create-dir-with-checksum didn't run the second time and the updated checksum is
# different from the original.
- "test -e '{{.FILE_0}}'"
- "test ! -e '{{.FILE_1}}'"
- "cmp -s '{{.FILE_0}}' '{{.CHECKSUM_FILE_REF0}}'"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
- "cmp -s '{{.FILE_0}}' '{{.CHECKSUM_FILE_REF0}}'"
- "cmp -s '{{.FILE_0}}' '{{.CHECKSUM_FILE_REF0}}'"

- "cmp -s '{{.CHECKSUM_FILE}}' '{{.CHECKSUM_FILE_REF1}}'"
- |-
if ! cmp -s '{{.CHECKSUM_FILE}}' '{{.CHECKSUM_FILE_REF0}}'; then
exit 0
fi
exit 1
checksum-test-init:
internal: true
requires:
vars: ["OUTPUT_DIR"]
cmds:
- "rm -rf '{{.OUTPUT_DIR}}'"
- "mkdir -p '{{.OUTPUT_DIR}}'"

create-dir-with-checksum:
internal: true
vars:
DIR: "{{dir .FILE_PATH}}"
requires:
vars: ["CHECKSUM_FILE", "FILE_PATH"]
sources: ["{{.TASKFILE}}"]
generates: ["{{.CHECKSUM_FILE}}"]
deps:
- task: "checksum:validate"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
INCLUDE_PATTERNS: ["{{.DIR}}"]
cmds:
- |-
rm -rf "{{.DIR}}"
mkdir -p "{{.DIR}}"
touch "{{.FILE_PATH}}"
- task: "checksum:compute"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
INCLUDE_PATTERNS: ["{{.DIR}}"]
6 changes: 6 additions & 0 deletions taskfiles/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ version: "3"

includes:
boost: "boost/tests.yaml"
checksum: "checksum/tests.yaml"
remote: "remote/tests.yaml"
ystdlib-py: "ystdlib-py/tests.yaml"

Expand All @@ -10,13 +11,18 @@ tasks:
internal: true
cmds:
- task: "boost"
- task: "checksum"
- task: "remote"
- task: "ystdlib-py"

boost:
cmds:
- task: "boost:test"

checksum:
cmds:
- task: "checksum:default"

remote:
cmds:
- task: "remote:default"
Expand Down