Skip to content

Commit 6b0bcf0

Browse files
committed
add workflow for token files
- workflow tested in dummy PR #62
1 parent 4968b3b commit 6b0bcf0

File tree

1 file changed

+125
-0
lines changed

1 file changed

+125
-0
lines changed
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
name: Check consistency of tokens.txt file
2+
3+
# Define the file paths under `paths` to trigger this check only when specific files are modified.
4+
# This script will then execute checks only on files that have changed, rather than all files listed in `paths`.
5+
6+
# **Note** : To add a new token file for checks, include its path in:
7+
# - `on` -> `push` and `pull_request` sections
8+
# - `jobs` -> `check_tokens` -> `steps` -> Set global variable for multiple tokens.txt paths -> `TOKENS_FILES`
9+
10+
on:
11+
push:
12+
paths:
13+
- "chebai/preprocessing/bin/smiles_token/tokens.txt"
14+
- "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt"
15+
- "chebai/preprocessing/bin/selfies/tokens.txt"
16+
- "chebai/preprocessing/bin/protein_token/tokens.txt"
17+
- "chebai/preprocessing/bin/graph_properties/tokens.txt"
18+
- "chebai/preprocessing/bin/graph/tokens.txt"
19+
- "chebai/preprocessing/bin/deepsmiles_token/tokens.txt"
20+
pull_request:
21+
paths:
22+
- "chebai/preprocessing/bin/smiles_token/tokens.txt"
23+
- "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt"
24+
- "chebai/preprocessing/bin/selfies/tokens.txt"
25+
- "chebai/preprocessing/bin/protein_token/tokens.txt"
26+
- "chebai/preprocessing/bin/graph_properties/tokens.txt"
27+
- "chebai/preprocessing/bin/graph/tokens.txt"
28+
- "chebai/preprocessing/bin/deepsmiles_token/tokens.txt"
29+
30+
jobs:
31+
check_tokens:
32+
runs-on: ubuntu-latest
33+
34+
steps:
35+
- name: Checkout code
36+
uses: actions/checkout@v2
37+
38+
- name: Get list of changed files
39+
id: changed_files
40+
run: |
41+
git fetch origin dev
42+
43+
# Get the list of changed files compared to origin/dev and save them to a file
44+
git diff --name-only origin/dev > changed_files.txt
45+
46+
# Print the names of changed files on separate lines
47+
echo "Changed files:"
48+
while read -r line; do
49+
echo "Changed File name : $line"
50+
done < changed_files.txt
51+
52+
- name: Set global variable for multiple tokens.txt paths
53+
run: |
54+
# All token files that needs to checked must be included here too, same as in `paths`.
55+
TOKENS_FILES=(
56+
"chebai/preprocessing/bin/smiles_token/tokens.txt"
57+
"chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt"
58+
"chebai/preprocessing/bin/selfies/tokens.txt"
59+
"chebai/preprocessing/bin/protein_token/tokens.txt"
60+
"chebai/preprocessing/bin/graph_properties/tokens.txt"
61+
"chebai/preprocessing/bin/graph/tokens.txt"
62+
"chebai/preprocessing/bin/deepsmiles_token/tokens.txt"
63+
)
64+
echo "TOKENS_FILES=${TOKENS_FILES[*]}" >> $GITHUB_ENV
65+
66+
- name: Process only changed tokens.txt files
67+
run: |
68+
# Convert the TOKENS_FILES environment variable into an array
69+
TOKENS_FILES=(${TOKENS_FILES})
70+
71+
# Iterate over each token file path
72+
for TOKENS_FILE_PATH in "${TOKENS_FILES[@]}"; do
73+
# Check if the current token file path is in the list of changed files
74+
if grep -q "$TOKENS_FILE_PATH" changed_files.txt; then
75+
echo "----------------------- Processing $TOKENS_FILE_PATH -----------------------"
76+
77+
# Get previous tokens.txt version
78+
git fetch origin dev
79+
git diff origin/dev -- $TOKENS_FILE_PATH > tokens_diff.txt || echo "No previous tokens.txt found for $TOKENS_FILE_PATH"
80+
81+
# Check for deleted or added lines in tokens.txt
82+
if [ -f tokens_diff.txt ]; then
83+
84+
# Check for deleted lines (lines starting with '-')
85+
deleted_lines=$(grep '^-' tokens_diff.txt | grep -v '^---' | sed 's/^-//' || true)
86+
if [ -n "$deleted_lines" ]; then
87+
echo "Error: Lines have been deleted from $TOKENS_FILE_PATH."
88+
echo -e "Deleted Lines: \n$deleted_lines"
89+
exit 1
90+
fi
91+
92+
# Check for added lines (lines starting with '+')
93+
added_lines=$(grep '^+' tokens_diff.txt | grep -v '^+++' | sed 's/^+//' || true)
94+
if [ -n "$added_lines" ]; then
95+
96+
# Count how many lines have been added
97+
num_added_lines=$(echo "$added_lines" | wc -l)
98+
99+
# Get last `n` lines (equal to num_added_lines) of tokens.txt
100+
last_lines=$(tail -n "$num_added_lines" $TOKENS_FILE_PATH)
101+
102+
# Check if the added lines are at the end of the file
103+
if [ "$added_lines" != "$last_lines" ]; then
104+
105+
# Find lines that were added but not appended at the end of the file
106+
non_appended_lines=$(diff <(echo "$added_lines") <(echo "$last_lines") | grep '^<' | sed 's/^< //')
107+
108+
echo "Error: New lines have been added to $TOKENS_FILE_PATH, but they are not at the end of the file."
109+
echo -e "Added lines that are not at the end of the file: \n$non_appended_lines"
110+
exit 1
111+
fi
112+
fi
113+
114+
if [ "$added_lines" == "" ]; then
115+
echo "$TOKENS_FILE_PATH validation successful: No lines were deleted, and no new lines were added."
116+
else
117+
echo "$TOKENS_FILE_PATH validation successful: No lines were deleted, and new lines were correctly appended at the end."
118+
fi
119+
else
120+
echo "No previous version of $TOKENS_FILE_PATH found."
121+
fi
122+
else
123+
echo "$TOKENS_FILE_PATH was not changed, skipping."
124+
fi
125+
done

0 commit comments

Comments
 (0)