Skip to content

Commit 272bf5c

Browse files
Add a github workflow for updating language data
* Also update the Readme
1 parent 8cefa89 commit 272bf5c

File tree

3 files changed

+294
-7
lines changed

3 files changed

+294
-7
lines changed
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
name: Update Embedded Writing System Data
2+
permissions:
3+
contents: write
4+
pull-requests: write
5+
on:
6+
workflow_dispatch:
7+
inputs:
8+
use_staging:
9+
description: 'Use SLDR staging data for testing'
10+
required: false
11+
default: false
12+
type: boolean
13+
update_langtags:
14+
description: 'Update langtags.json'
15+
required: false
16+
default: true
17+
type: boolean
18+
update_iana:
19+
description: 'Update ianaSubtagRegistry.txt'
20+
required: false
21+
default: true
22+
type: boolean
23+
24+
env:
25+
LANGTAGS_PRODUCTION_URL: 'https://ldml.api.sil.org/index.html?query=langtags&ext=json'
26+
LANGTAGS_STAGING_URL: 'https://ldml.api.sil.org/index.html?query=langtags&ext=json&staging=1'
27+
IANA_URL: 'https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry'
28+
29+
jobs:
30+
check-changes:
31+
runs-on: ubuntu-latest
32+
outputs:
33+
has_changes: ${{ steps.changes.outputs.has_changes }}
34+
has_langtags_changes: ${{ steps.changes.outputs.has_langtags_changes }}
35+
has_iana_changes: ${{ steps.changes.outputs.has_iana_changes }}
36+
langtags_url: ${{ steps.changes.outputs.langtags_url }}
37+
steps:
38+
- name: Checkout repository
39+
uses: actions/checkout@v4
40+
with:
41+
token: ${{ secrets.GITHUB_TOKEN }}
42+
fetch-depth: 0
43+
44+
- name: Download and check for changes
45+
id: changes
46+
shell: bash
47+
run: |
48+
has_changes=false
49+
has_langtags_changes=false
50+
has_iana_changes=false
51+
langtags_url=""
52+
53+
if [[ "${{ github.event.inputs.update_langtags }}" == "true" ]]; then
54+
if [[ "${{ github.event.inputs.use_staging }}" == "true" ]]; then
55+
url="$LANGTAGS_STAGING_URL"
56+
else
57+
url="$LANGTAGS_PRODUCTION_URL"
58+
fi
59+
langtags_url="$url"
60+
echo "Downloading langtags from: $url"
61+
curl -f -o "langtags.json.new" "$url"
62+
63+
# Validate JSON format
64+
if ! jq empty "langtags.json.new" 2>/dev/null; then
65+
echo "Error: Downloaded langtags file is not valid JSON"
66+
exit 1
67+
fi
68+
69+
if ! cmp -s "SIL.WritingSystems/Resources/langtags.json" "langtags.json.new"; then
70+
echo "Changes detected in langtags.json"
71+
has_changes=true
72+
has_langtags_changes=true
73+
fi
74+
fi
75+
76+
if [[ "${{ github.event.inputs.update_iana }}" == "true" ]]; then
77+
echo "Downloading IANA registry from: $IANA_URL"
78+
curl -f -o "ianaSubtagRegistry.txt.new" "$IANA_URL"
79+
if ! cmp -s "SIL.WritingSystems/Resources/ianaSubtagRegistry.txt" "ianaSubtagRegistry.txt.new"; then
80+
echo "Changes detected in ianaSubtagRegistry.txt"
81+
has_changes=true
82+
has_iana_changes=true
83+
fi
84+
fi
85+
86+
echo "has_changes=$has_changes" >> $GITHUB_OUTPUT
87+
echo "has_langtags_changes=$has_langtags_changes" >> $GITHUB_OUTPUT
88+
echo "has_iana_changes=$has_iana_changes" >> $GITHUB_OUTPUT
89+
echo "langtags_url=$langtags_url" >> $GITHUB_OUTPUT
90+
91+
update-changelog:
92+
needs: check-changes
93+
runs-on: ubuntu-latest
94+
if: needs.check-changes.outputs.has_changes == 'true'
95+
steps:
96+
- name: Checkout repository
97+
uses: actions/checkout@v4
98+
with:
99+
token: ${{ secrets.GITHUB_TOKEN }}
100+
fetch-depth: 0
101+
102+
- name: Build changelog entry text
103+
id: changelog_entry
104+
shell: bash
105+
run: |
106+
entry_text=""
107+
if [ "${{ needs.check-changes.outputs.has_langtags_changes }}" == "true" ]; then
108+
entry_text="[SIL.WritingSystems] Updated embedded langtags.json"
109+
fi
110+
if [ "${{ needs.check-changes.outputs.has_iana_changes }}" == "true" ]; then
111+
if [ -n "$entry_text" ]; then
112+
entry_text="$entry_text"$'\n'"[SIL.WritingSystems] Updated embedded ianaSubtagRegistry.txt"
113+
else
114+
entry_text="[SIL.WritingSystems] Updated embedded ianaSubtagRegistry.txt"
115+
fi
116+
fi
117+
echo "text<<EOF" >> $GITHUB_OUTPUT
118+
echo "$entry_text" >> $GITHUB_OUTPUT
119+
echo "EOF" >> $GITHUB_OUTPUT
120+
121+
- name: Add langtags entry to CHANGELOG.md
122+
if: needs.check-changes.outputs.has_langtags_changes
123+
uses: claudiodekker/changelog-updater@6d9e21971591cfd515ef8cc71b721b767794afd4
124+
with:
125+
section: "Changed"
126+
entry-text: "[SIL.WritingSystems] Updated embedded langtags.json"
127+
128+
- name: Add iana subtags entry to CHANGELOG.md
129+
if: needs.check-changes.outputs.has_iana_changes
130+
uses: claudiodekker/changelog-updater@6d9e21971591cfd515ef8cc71b721b767794afd4
131+
with:
132+
section: "Changed"
133+
entry-text: "[SIL.WritingSystems] Updated embedded ianaSubtagRegistry.txt"
134+
135+
- name: Upload updated changelog
136+
uses: actions/upload-artifact@v4
137+
with:
138+
name: updated-changelog
139+
path: CHANGELOG.md
140+
retention-days: 1
141+
142+
update-langtags:
143+
needs: [check-changes, update-changelog]
144+
runs-on: windows-latest
145+
if: needs.check-changes.outputs.has_changes == 'true'
146+
steps:
147+
- name: Checkout repository
148+
uses: actions/checkout@v4
149+
with:
150+
token: ${{ secrets.GITHUB_TOKEN }}
151+
fetch-depth: 0
152+
153+
- name: Setup .NET
154+
uses: actions/setup-dotnet@v4
155+
with:
156+
dotnet-version: '8.0.x'
157+
158+
- name: Download latest langtags.json
159+
if: github.event.inputs.update_langtags == 'true' && needs.check-changes.outputs.has_langtags_changes == 'true'
160+
id: langtags
161+
shell: bash
162+
run: |
163+
if [[ "${{ github.event.inputs.use_staging }}" == "true" ]]; then
164+
url="$LANGTAGS_STAGING_URL"
165+
else
166+
url="$LANGTAGS_PRODUCTION_URL"
167+
fi
168+
echo "Downloading from: $url"
169+
curl -f -o "langtags.json.new" "$url"
170+
171+
# Validate JSON format
172+
if ! jq empty "langtags.json.new" 2>/dev/null; then
173+
echo "Error: Downloaded file is not valid JSON"
174+
exit 1
175+
fi
176+
177+
mv "langtags.json.new" "SIL.WritingSystems/Resources/langtags.json"
178+
echo "Updated langtags.json"
179+
echo "url=$url" >> $GITHUB_OUTPUT
180+
181+
- name: Download latest iana language-subtag-registry
182+
if: github.event.inputs.update_iana == 'true' && needs.check-changes.outputs.has_iana_changes == 'true'
183+
id: subtagregistry
184+
shell: bash
185+
run: |
186+
echo "Downloading from: ${{ env.IANA_URL }}"
187+
curl -f -o "ianaSubtagRegistry.txt.new" "$IANA_URL"
188+
mv "ianaSubtagRegistry.txt.new" "SIL.WritingSystems/Resources/ianaSubtagRegistry.txt"
189+
echo "Updated ianaSubtagRegistry.txt"
190+
191+
- name: Restore & Build SIL.WritingSystems.Tests
192+
shell: bash
193+
run: |
194+
dotnet build SIL.WritingSystems.Tests -p:TargetFramework=net8.0 -p:Configuration=Release
195+
echo "TEST_PATH=$(dotnet msbuild SIL.WritingSystems.Tests/ --getProperty:OutputPath -p:TargetFramework=net8.0 -p:Configuration=Release)" >> $GITHUB_ENV
196+
197+
- name: Test SIL.WritingSystems.Tests
198+
shell: bash
199+
run: |
200+
echo "## Test Summary" >> $GITHUB_STEP_SUMMARY
201+
echo "Executing SIL.WritingSystems.Tests..." >> $GITHUB_STEP_SUMMARY
202+
203+
dotnet test "$TEST_PATH"/SIL.WritingSystems.Tests.dll \
204+
--no-build \
205+
--filter "TestCategory!=SkipOnTeamCity" \
206+
--blame-hang-timeout 5m \
207+
--logger:"trx;LogFilePrefix=results" \
208+
--results-directory ./test-results
209+
210+
EXIT_CODE=$?
211+
if [ $EXIT_CODE -eq 0 ]; then
212+
echo "✅ Tests passed." >> $GITHUB_STEP_SUMMARY
213+
else
214+
echo "❌ Tests failed." >> $GITHUB_STEP_SUMMARY
215+
exit $EXIT_CODE
216+
fi
217+
218+
- name: Download updated changelog
219+
uses: actions/download-artifact@v4
220+
with:
221+
name: updated-changelog
222+
path: .
223+
224+
- name: Create Pull Request
225+
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e
226+
with:
227+
base: main-copy-for-testing
228+
token: ${{ secrets.GITHUB_TOKEN }}
229+
add-paths: |
230+
SIL.WritingSystems/Resources/langtags.json
231+
SIL.WritingSystems/Resources/ianaSubtagRegistry.txt
232+
CHANGELOG.md
233+
commit-message: |
234+
Update embedded writing system data
235+
236+
- Updated: $(date -u '+%Y-%m-%d %H:%M:%S UTC')
237+
- langtags.json: ${{ needs.check-changes.outputs.has_langtags_changes == 'true' && 'Updated' || 'No changes' }}
238+
- ianaSubtagRegistry.txt: ${{ needs.check-changes.outputs.has_iana_changes == 'true' && 'Updated' || 'No changes' }}
239+
- SLDR staging: ${{ github.event.inputs.use_staging }}
240+
title: 'Update embedded writing system data'
241+
body: |
242+
## Automated Writing System Data Update
243+
244+
This PR updates the embedded writing system data files as described in `SIL.WritingSystems/Readme.md`.
245+
246+
**Workflow Run:** [View Summary](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
247+
248+
**Files Updated:**
249+
- langtags.json: ${{ needs.check-changes.outputs.has_langtags_changes == 'true' && format('✅ Updated from {0}', needs.check-changes.outputs.langtags_url) || '⏭️ Skipped' }}
250+
- ianaSubtagRegistry.txt: ${{ needs.check-changes.outputs.has_iana_changes == 'true' && format('✅ Updated from {0}', env.IANA_URL) || '⏭️ Skipped' }}
251+
252+
**Next Steps:**
253+
- Review the changes
254+
- Run any additional manual tests if needed
255+
- Merge when ready
256+
branch: update-writing-system-data
257+
delete-branch: true
258+
259+
- name: Create summary
260+
if: always()
261+
shell: bash
262+
run: |
263+
echo "## Writing System Data Update Summary" >> $GITHUB_STEP_SUMMARY
264+
echo "- **SLDR Staging Mode**: ${{ github.event.inputs.use_staging }}" >> $GITHUB_STEP_SUMMARY
265+
echo "- **Changes Detected**: ${{ needs.check-changes.outputs.has_changes }}" >> $GITHUB_STEP_SUMMARY
266+
echo "- **Update langtags.json if newer**: ${{ github.event.inputs.update_langtags }}" >> $GITHUB_STEP_SUMMARY
267+
echo "- **Update ianaSubtagRegistry.txt if newer**: ${{ github.event.inputs.update_iana }}" >> $GITHUB_STEP_SUMMARY
268+
269+
if [[ "${{ needs.check-changes.outputs.has_changes }}" == "true" ]]; then
270+
echo "- **Action Taken**: Files updated, tests run, PR created" >> $GITHUB_STEP_SUMMARY
271+
else
272+
echo "- **Action Taken**: No changes detected, no updates needed" >> $GITHUB_STEP_SUMMARY
273+
fi

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
77

88
<!-- Available types of changes:
99
### Added
10-
### Changed
1110
### Fixed
11+
### Changed
1212
### Deprecated
1313
### Removed
1414
### Security

SIL.WritingSystems/Readme.md

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,31 @@
1-
## SIL.WritingSystems Library
1+
# SIL.WritingSystems Library
22

33
This library contains many classes that make working with writing systems and language tags easier
44

5-
### Updating langtags.json
5+
## SIL Locale Data Repository
66

7-
To update langtags.json to the latest follow the following steps:
7+
Much of the writing system data that this library provides comes from the [SIL Locale Data Repository (SLDR)](https://github.com/silnrsi/sldr?tab=readme-ov-file#sil-locale-data-repository-sldr).
8+
To test with updated SLDR data from the staging area you can set an environment variable
9+
10+
`SLDR_USE_STAGING=true`
11+
12+
## Updating embedded writing system and language data
13+
14+
There is a github action that can be run to update the `langtags.json` and `ianaSubtagRegistry.txt` which are embedded in the library.
15+
It will download the latest and, after a successful run of the WritingSystems tests, create a PR to update both files.
16+
17+
### langtags.json
18+
19+
The list of language tag identifiers is curated by the Writing Systems Technology group and provided in a `langtags.json` file.
20+
This library is used as the final fallback in case of problems with the data served from https://ldml.api.sil.org/langtags.json
21+
22+
To manually update langtags.json to the latest, follow the following steps:
823

9-
1. Run the unit test suite by hand and note (or fix) any failures to ByHand and SkipOnTeamCity category tests
1024
1. Replace `Resources\langtags.json` with the content from https://ldml.api.sil.org/langtags.json
1125
1. Run the unit test suite by hand and fix any tests that relied on old langtags data
1226
1. Commit the changes
1327

14-
### Updating ianaSubtagRegistry.txt
15-
To update ianaSubtagRegistry.txt to the latest, replace `Resources\ianaSubtagRegistry.txt` with
28+
### ianaSubtagRegistry.txt
29+
To manually update ianaSubtagRegistry.txt to the latest, replace `Resources\ianaSubtagRegistry.txt` with
1630
the content from https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
1731

0 commit comments

Comments
 (0)