Skip to content

Commit 779ca55

Browse files
authored
Create clean_validate_merge_ttl.yml
1 parent 69b9f31 commit 779ca55

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
name: Clean, Merge, and Upload TTL Files
2+
3+
on:
4+
push:
5+
paths:
6+
- 'persons/*.ttl'
7+
- '.github/workflows/clean_validate_merge_ttl.yml'
8+
9+
jobs:
10+
clean-merge-upload:
11+
runs-on: ubuntu-latest
12+
13+
env:
14+
BUCKET_NAME: ${{ secrets.AWS_TRIPLESTORE_BUCKET_NAME }}
15+
MERGED_FILE: persons.ttl
16+
17+
steps:
18+
- name: Checkout repository
19+
uses: actions/checkout@v3
20+
with:
21+
token: ${{ secrets.GITHUB_TOKEN }}
22+
23+
- name: Set up Git identity
24+
run: |
25+
git config --global user.name "github-actions[bot]"
26+
git config --global user.email "github-actions[bot]@users.noreply.github.com"
27+
28+
- name: Install Apache Jena (riot)
29+
run: |
30+
sudo apt-get update
31+
sudo apt-get install -y default-jre
32+
wget https://dlcdn.apache.org/jena/binaries/apache-jena-4.10.0.tar.gz
33+
tar -xzf apache-jena-4.10.0.tar.gz
34+
echo "$PWD/apache-jena-4.10.0/bin" >> $GITHUB_PATH
35+
36+
- name: Clean TTL files
37+
run: |
38+
mkdir -p cleaned
39+
for file in $(find ttl -name '*.ttl'); do
40+
fname=$(basename "$file")
41+
awk '
42+
BEGIN {OFS=FS=""}
43+
{
44+
while (match($0, /<[^>]*>/)) {
45+
uri = substr($0, RSTART, RLENGTH)
46+
gsub(/ /, "%20", uri)
47+
$0 = substr($0, 1, RSTART - 1) uri substr($0, RSTART + RLENGTH)
48+
}
49+
gsub(/@en-x-srp1/, "@en")
50+
gsub(/\|/, "")
51+
print
52+
}' "$file" > "cleaned/$fname"
53+
done
54+
55+
- name: Validate TTL files with riot
56+
run: |
57+
for file in cleaned/*.ttl; do
58+
echo "Validating $file..."
59+
riot "$file"
60+
done
61+
62+
- name: Merge cleaned TTL files
63+
run: |
64+
cat cleaned/*.ttl > "$MERGED_FILE"
65+
echo "Merged file created: $MERGED_FILE"
66+
67+
- name: Commit merged file to repo
68+
run: |
69+
git pull origin main
70+
cp "$MERGED_FILE" persons/
71+
git add persons/$MERGED_FILE
72+
git commit -m "Update merged.ttl on $(date +'%Y-%m-%d %H:%M:%S')" || echo "No changes to commit"
73+
git push origin main
74+
75+
# - name: Configure AWS credentials
76+
# uses: aws-actions/configure-aws-credentials@v3
77+
# with:
78+
# role-to-assume: ${{ secrets.AWS_LINKED_DATA_ROLE }}
79+
# aws-region: ${{ secrets.AWS_REGION }}
80+
# role-duration-seconds: 28800
81+
# role-session-name: GitHubOIDCS3Uploader
82+
83+
# - name: Upload merged.ttl to S3
84+
# run: |
85+
# aws s3 cp "$MERGED_FILE" "s3://$BUCKET_NAME/$MERGED_FILE"

0 commit comments

Comments
 (0)