Skip to content

Commit a456bed

Browse files
Add a github workflow for updating language data
* Also update the Readme
1 parent 8cefa89 commit a456bed

File tree

2 files changed

+286
-6
lines changed

2 files changed

+286
-6
lines changed
Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
name: Update Embedded Writing System Data
2+
permissions:
3+
contents: write
4+
pull-requests: write
5+
on:
6+
workflow_dispatch:
7+
inputs:
8+
use_staging:
9+
description: 'Use SLDR staging data for testing'
10+
required: false
11+
default: false
12+
type: boolean
13+
update_langtags:
14+
description: 'Update langtags.json'
15+
required: false
16+
default: true
17+
type: boolean
18+
update_iana:
19+
description: 'Update ianaSubtagRegistry.txt'
20+
required: false
21+
default: true
22+
type: boolean
23+
24+
env:
25+
LANGTAGS_PRODUCTION_URL: "https://ldml.api.sil.org/index.html?query=langtags&ext=json"
26+
LANGTAGS_STAGING_URL: "https://ldml.api.sil.org/index.html?query=langtags&ext=json&staging=1"
27+
IANA_URL: "https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry"
28+
29+
jobs:
30+
check-changes:
31+
runs-on: ubuntu-latest
32+
outputs:
33+
has_changes: ${{ steps.changes.outputs.has_changes }}
34+
has_langtags_changes: ${{ steps.changes.outputs.has_langtags_changes }}
35+
has_iana_changes: ${{ steps.changes.outputs.has_iana_changes }}
36+
langtags_url: ${{ steps.changes.outputs.langtags_url }}
37+
steps:
38+
- name: Checkout repository
39+
uses: actions/checkout@v4
40+
with:
41+
token: ${{ secrets.GITHUB_TOKEN }}
42+
fetch-depth: 0
43+
44+
- name: Download and check for changes
45+
id: changes
46+
shell: bash
47+
run: |
48+
has_changes=false
49+
has_langtags_changes=false
50+
has_iana_changes=false
51+
langtags_url=""
52+
53+
if [[ "${{ github.event.inputs.update_langtags }}" == "true" ]]; then
54+
if [[ "${{ github.event.inputs.use_staging }}" == "true" ]]; then
55+
url="$LANGTAGS_STAGING_URL"
56+
else
57+
url="$LANGTAGS_PRODUCTION_URL"
58+
fi
59+
langtags_url="$url"
60+
echo "Downloading langtags from: $url"
61+
curl -f -o "langtags.json.new" "$url"
62+
63+
# Validate JSON format
64+
if ! jq empty "langtags.json.new" 2>/dev/null; then
65+
echo "Error: Downloaded langtags file is not valid JSON"
66+
exit 1
67+
fi
68+
69+
if ! cmp -s "SIL.WritingSystems/Resources/langtags.json" "langtags.json.new"; then
70+
echo "Changes detected in langtags.json"
71+
has_changes=true
72+
has_langtags_changes=true
73+
fi
74+
fi
75+
76+
if [[ "${{ github.event.inputs.update_iana }}" == "true" ]]; then
77+
echo "Downloading IANA registry from: $IANA_URL"
78+
curl -f -o "ianaSubtagRegistry.txt.new" "$IANA_URL"
79+
if ! cmp -s "SIL.WritingSystems/Resources/ianaSubtagRegistry.txt" "ianaSubtagRegistry.txt.new"; then
80+
echo "Changes detected in ianaSubtagRegistry.txt"
81+
has_changes=true
82+
has_iana_changes=true
83+
fi
84+
fi
85+
86+
echo "has_changes=$has_changes" >> $GITHUB_OUTPUT
87+
echo "has_langtags_changes=$has_langtags_changes" >> $GITHUB_OUTPUT
88+
echo "has_iana_changes=$has_iana_changes" >> $GITHUB_OUTPUT
89+
echo "langtags_url=$langtags_url" >> $GITHUB_OUTPUT
90+
91+
update-changelog:
92+
needs: check-changes
93+
runs-on: ubuntu-latest
94+
if: needs.check-changes.outputs.has_changes == 'true'
95+
steps:
96+
- name: Checkout repository
97+
uses: actions/checkout@v4
98+
with:
99+
token: ${{ secrets.GITHUB_TOKEN }}
100+
fetch-depth: 0
101+
102+
- name: Build changelog entry text
103+
id: changelog_entry
104+
shell: bash
105+
run: |
106+
entry_text=""
107+
if [ "${{ needs.check-changes.outputs.has_langtags_changes }}" == "true" ]; then
108+
entry_text="[SIL.WritingSystems] Updated embedded langtags.json"
109+
fi
110+
if [ "${{ needs.check-changes.outputs.has_iana_changes }}" == "true" ]; then
111+
if [ -n "$entry_text" ]; then
112+
entry_text="$entry_text"$'\n'"[SIL.WritingSystems] Updated embedded ianaSubtagRegistry.txt"
113+
else
114+
entry_text="[SIL.WritingSystems] Updated embedded ianaSubtagRegistry.txt"
115+
fi
116+
fi
117+
echo "text<<EOF" >> $GITHUB_OUTPUT
118+
echo "$entry_text" >> $GITHUB_OUTPUT
119+
echo "EOF" >> $GITHUB_OUTPUT
120+
121+
- name: Add entry to CHANGELOG.md
122+
uses: claudiodekker/changelog-updater@6d9e21971591cfd515ef8cc71b721b767794afd4
123+
with:
124+
section: "Changed"
125+
entry-text: "${{ steps.changelog_entry.outputs.text }}"
126+
127+
- name: Upload updated changelog
128+
uses: actions/upload-artifact@v4
129+
with:
130+
name: updated-changelog
131+
path: CHANGELOG.md
132+
retention-days: 1
133+
134+
update-langtags:
135+
needs: [check-changes, update-changelog]
136+
runs-on: windows-latest
137+
if: needs.check-changes.outputs.has_changes == 'true'
138+
steps:
139+
- name: Checkout repository
140+
uses: actions/checkout@v4
141+
with:
142+
token: ${{ secrets.GITHUB_TOKEN }}
143+
fetch-depth: 0
144+
145+
- name: Setup .NET
146+
uses: actions/setup-dotnet@v4
147+
with:
148+
dotnet-version: '8.0.x'
149+
150+
- name: Download latest langtags.json
151+
if: github.event.inputs.update_langtags == 'true' && needs.check-changes.outputs.has_langtags_changes == 'true'
152+
id: langtags
153+
shell: bash
154+
run: |
155+
if [[ "${{ github.event.inputs.use_staging }}" == "true" ]]; then
156+
url="$LANGTAGS_STAGING_URL"
157+
else
158+
url="$LANGTAGS_PRODUCTION_URL"
159+
fi
160+
echo "Downloading from: $url"
161+
curl -f -o "langtags.json.new" "$url"
162+
163+
# Validate JSON format
164+
if ! jq empty "langtags.json.new" 2>/dev/null; then
165+
echo "Error: Downloaded file is not valid JSON"
166+
exit 1
167+
fi
168+
169+
mv "langtags.json.new" "SIL.WritingSystems/Resources/langtags.json"
170+
echo "Updated langtags.json"
171+
echo "url=$url" >> $GITHUB_OUTPUT
172+
173+
- name: Download latest iana language-subtag-registry
174+
if: github.event.inputs.update_iana == 'true' && needs.check-changes.outputs.has_iana_changes == 'true'
175+
id: subtagregistry
176+
shell: bash
177+
run: |
178+
echo "Downloading from: ${{ env.IANA_URL }}"
179+
curl -f -o "ianaSubtagRegistry.txt.new" "$IANA_URL"
180+
mv "ianaSubtagRegistry.txt.new" "SIL.WritingSystems/Resources/ianaSubtagRegistry.txt"
181+
echo "Updated ianaSubtagRegistry.txt"
182+
183+
- name: Restore & Build SIL.WritingSystems.Tests
184+
shell: bash
185+
run: |
186+
dotnet build SIL.WritingSystems.Tests -p:TargetFramework=net8.0 -p:Configuration=Release
187+
echo "TEST_PATH=$(dotnet msbuild SIL.WritingSystems.Tests/ --getProperty:OutputPath -p:TargetFramework=net8.0 -p:Configuration=Release)" >> $GITHUB_ENV
188+
189+
- name: Test SIL.WritingSystems.Tests
190+
shell: bash
191+
run: |
192+
echo "## Test Summary" >> $GITHUB_STEP_SUMMARY
193+
echo "Executing SIL.WritingSystems.Tests..." >> $GITHUB_STEP_SUMMARY
194+
195+
dotnet test "$TEST_PATH"/SIL.WritingSystems.Tests.dll \
196+
--no-build \
197+
--filter "TestCategory!=SkipOnTeamCity" \
198+
--blame-hang-timeout 5m \
199+
--logger:"trx;LogFilePrefix=results" \
200+
--results-directory ./test-results
201+
202+
EXIT_CODE=$?
203+
if [ $EXIT_CODE -eq 0 ]; then
204+
echo "✅ Tests passed." >> $GITHUB_STEP_SUMMARY
205+
else
206+
echo "❌ Tests failed." >> $GITHUB_STEP_SUMMARY
207+
exit $EXIT_CODE
208+
fi
209+
210+
- name: Download updated changelog
211+
uses: actions/download-artifact@v4
212+
with:
213+
name: updated-changelog
214+
path: .
215+
216+
- name: Create Pull Request
217+
uses: peter-evans/create-pull-request@v6
218+
with:
219+
token: ${{ secrets.GITHUB_TOKEN }}
220+
# Add the 'add-paths' parameter here
221+
add-paths: |
222+
SIL.WritingSystems/Resources/langtags.json
223+
SIL.WritingSystems/Resources/ianaSubtagRegistry.txt
224+
CHANGELOG.md
225+
base: main # Recommended to use the actual main branch name
226+
commit-message: |
227+
Update embedded writing system data
228+
229+
- Updated: $(date -u '+%Y-%m-%d %H:%M:%S UTC')
230+
- langtags.json: ${{ needs.check-changes.outputs.has_langtags_changes == 'true' && 'Updated' || 'No changes' }}
231+
- ianaSubtagRegistry.txt: ${{ needs.check-changes.outputs.has_iana_changes == 'true' && 'Updated' || 'No changes' }}
232+
- SLDR staging: ${{ github.event.inputs.use_staging }}
233+
title: "Update embedded writing system data"
234+
body: |
235+
## Automated Writing System Data Update
236+
237+
This PR updates the embedded writing system data files as described in `SIL.WritingSystems/Readme.md`.
238+
239+
**Workflow Run:** [View Summary](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
240+
241+
**Files Updated:**
242+
- langtags.json: ${{ needs.check-changes.outputs.has_langtags_changes == 'true' && format('✅ Updated from {0}', needs.check-changes.outputs.langtags_url) || '⏭️ Skipped' }}
243+
- ianaSubtagRegistry.txt: ${{ needs.check-changes.outputs.has_iana_changes == 'true' && format('✅ Updated from {0}', env.IANA_URL) || '⏭️ Skipped' }}
244+
245+
**Next Steps:**
246+
- Review the changes
247+
- Run any additional manual tests if needed
248+
- Merge when ready
249+
branch: update-writing-system-data
250+
delete-branch: true
251+
252+
- name: Create summary
253+
if: always()
254+
shell: bash
255+
run: |
256+
echo "## Writing System Data Update Summary" >> $GITHUB_STEP_SUMMARY
257+
echo "- **SLDR Staging Mode**: ${{ github.event.inputs.use_staging }}" >> $GITHUB_STEP_SUMMARY
258+
echo "- **Changes Detected**: ${{ needs.check-changes.outputs.has_changes }}" >> $GITHUB_STEP_SUMMARY
259+
echo "- **Update langtags.json if newer**: ${{ github.event.inputs.update_langtags }}" >> $GITHUB_STEP_SUMMARY
260+
echo "- **Update ianaSubtagRegistry.txt if newer**: ${{ github.event.inputs.update_iana }}" >> $GITHUB_STEP_SUMMARY
261+
262+
if [[ "${{ needs.check-changes.outputs.has_changes }}" == "true" ]]; then
263+
echo "- **Action Taken**: Files updated, tests run, PR created" >> $GITHUB_STEP_SUMMARY
264+
else
265+
echo "- **Action Taken**: No changes detected, no updates needed" >> $GITHUB_STEP_SUMMARY
266+
fi

SIL.WritingSystems/Readme.md

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,31 @@
1-
## SIL.WritingSystems Library
1+
# SIL.WritingSystems Library
22

33
This library contains many classes that make working with writing systems and language tags easier
44

5-
### Updating langtags.json
5+
## SIL Locale Data Repository
66

7-
To update langtags.json to the latest follow the following steps:
7+
Much of the writing system data that this library provides comes from the [SIL Locale Data repository (SLDR)](https://github.com/silnrsi/sldr?tab=readme-ov-file#sil-locale-data-repository-sldr)
8+
To test with updated SLDR data from the staging area you can set an environment variable
9+
10+
`SLDR_USE_STAGING=true`
11+
12+
## Updating embedded writing system and language data
13+
14+
There is a github action that can be run to update the `langtags.json` and `ianaSubtagRegistry.txt` which are embedded in the library.
15+
It will download the latest and, after a successful run of the WritingSystems tests, create a PR to update both files.
16+
17+
### langtags.json
18+
19+
The list of language tag identifiers is curated by the Writing Systems Technology group and provided in a `langtags.json` file.
20+
This library is used as the final fallback in case of problems with the data served from https://ldml.api.sil.org/langtags.json
21+
22+
To manually update langtags.json to the latest follow the following steps:
823

9-
1. Run the unit test suite by hand and note (or fix) any failures to ByHand and SkipOnTeamCity category tests
1024
1. Replace `Resources\langtags.json` with the content from https://ldml.api.sil.org/langtags.json
1125
1. Run the unit test suite by hand and fix any tests that relied on old langtags data
1226
1. Commit the changes
1327

14-
### Updating ianaSubtagRegistry.txt
15-
To update ianaSubtagRegistry.txt to the latest, replace `Resources\ianaSubtagRegistry.txt` with
28+
### ianaSubtagRegistry.txt
29+
To manually update ianaSubtagRegistry.txt to the latest, replace `Resources\ianaSubtagRegistry.txt` with
1630
the content from https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
1731

0 commit comments

Comments
 (0)