Skip to content

Commit 13e75ae

Browse files
committed
Create token_consistency.yaml
1 parent 4968b3b commit 13e75ae

File tree

1 file changed

+60
-0
lines changed

1 file changed

+60
-0
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
name: Check consistency of tokens.txt file
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
check_tokens:
7+
runs-on: ubuntu-latest
8+
9+
steps:
10+
- name: Checkout code
11+
uses: actions/checkout@v2
12+
13+
- name: Get previous tokens.txt version
14+
run: |
15+
git fetch origin main
16+
git diff origin/main -- chebai/preprocessing/bin/smiles_token/tokens.txt > tokens_diff.txt || echo "No previous tokens.txt found"
17+
18+
- name: Check for deleted or added lines in tokens.txt
19+
run: |
20+
if [ -f tokens_diff.txt ]; then
21+
22+
# Check for deleted lines (lines starting with '-')
23+
deleted_lines=$(grep '^-' tokens_diff.txt | grep -v '^---' | sed 's/^-//' || true)
24+
if [ -n "$deleted_lines" ]; then
25+
echo "Error: Lines have been deleted from tokens.txt. file"
26+
echo -e "Deleted Lines: \n$deleted_lines"
27+
exit 1
28+
fi
29+
30+
# Check for added lines (lines starting with '+')
31+
added_lines=$(grep '^+' tokens_diff.txt | grep -v '^+++' | sed 's/^+//' || true)
32+
if [ -n "$added_lines" ]; then
33+
34+
# Count how many lines have been added
35+
num_added_lines=$(echo "$added_lines" | wc -l)
36+
37+
# Get last `n` lines (equal to num_added_lines) of tokens.tx
38+
last_lines=$(tail -n "$num_added_lines" chebai/preprocessing/bin/smiles_token/tokens.txt)
39+
40+
# Check if the added lines are at the end of the file
41+
if [ "$added_lines" != "$last_lines" ]; then
42+
43+
# Find lines that were added but not appended at the end of the file
44+
non_appended_lines=$(diff <(echo "$added_lines") <(echo "$last_lines") | grep '^<' | sed 's/^< //')
45+
46+
echo "Error: New lines have been added, but they are not at the end of tokens.txt."
47+
echo -e "Added lines that are not at end of file: \n$non_appended_lines"
48+
exit 1
49+
fi
50+
fi
51+
52+
if [ "$added_lines" == "" ]; then
53+
echo "tokens.txt validation successful: No lines were deleted, and no new lines were added."
54+
else
55+
echo "tokens.txt validation successful: No lines were deleted, and new lines were correctly appended at the end."
56+
fi
57+
58+
else
59+
echo "No previous version of tokens.txt found."
60+
fi

0 commit comments

Comments
 (0)