Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
272 changes: 272 additions & 0 deletions .github/workflows/update-language-data.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
name: Update Embedded Writing System Data
permissions:
contents: write
pull-requests: write
on:
workflow_dispatch:
inputs:
use_staging:
description: 'Use SLDR staging data for testing'
required: false
default: false
type: boolean
update_langtags:
description: 'Update langtags.json'
required: false
default: true
type: boolean
update_iana:
description: 'Update ianaSubtagRegistry.txt'
required: false
default: true
type: boolean

env:
LANGTAGS_PRODUCTION_URL: 'https://ldml.api.sil.org/index.html?query=langtags&ext=json'
LANGTAGS_STAGING_URL: 'https://ldml.api.sil.org/index.html?query=langtags&ext=json&staging=1'
IANA_URL: 'https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry'

jobs:
check-changes:
runs-on: ubuntu-latest
outputs:
has_changes: ${{ steps.changes.outputs.has_changes }}
has_langtags_changes: ${{ steps.changes.outputs.has_langtags_changes }}
has_iana_changes: ${{ steps.changes.outputs.has_iana_changes }}
langtags_url: ${{ steps.changes.outputs.langtags_url }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
fetch-depth: 0

- name: Download and check for changes
id: changes
shell: bash
run: |
has_changes=false
has_langtags_changes=false
has_iana_changes=false
langtags_url=""

if [[ "${{ github.event.inputs.update_langtags }}" == "true" ]]; then
if [[ "${{ github.event.inputs.use_staging }}" == "true" ]]; then
url="$LANGTAGS_STAGING_URL"
else
url="$LANGTAGS_PRODUCTION_URL"
fi
langtags_url="$url"
echo "Downloading langtags from: $url"
curl -f -o "langtags.json.new" "$url"

# Validate JSON format
if ! jq empty "langtags.json.new" 2>/dev/null; then
echo "Error: Downloaded langtags file is not valid JSON"
exit 1
fi

if ! cmp -s "SIL.WritingSystems/Resources/langtags.json" "langtags.json.new"; then
echo "Changes detected in langtags.json"
has_changes=true
has_langtags_changes=true
fi
fi

if [[ "${{ github.event.inputs.update_iana }}" == "true" ]]; then
echo "Downloading IANA registry from: $IANA_URL"
curl -f -o "ianaSubtagRegistry.txt.new" "$IANA_URL"
if ! cmp -s "SIL.WritingSystems/Resources/ianaSubtagRegistry.txt" "ianaSubtagRegistry.txt.new"; then
echo "Changes detected in ianaSubtagRegistry.txt"
has_changes=true
has_iana_changes=true
fi
fi

echo "has_changes=$has_changes" >> $GITHUB_OUTPUT
echo "has_langtags_changes=$has_langtags_changes" >> $GITHUB_OUTPUT
echo "has_iana_changes=$has_iana_changes" >> $GITHUB_OUTPUT
echo "langtags_url=$langtags_url" >> $GITHUB_OUTPUT

update-changelog:
needs: check-changes
runs-on: ubuntu-latest
if: needs.check-changes.outputs.has_changes == 'true'
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
fetch-depth: 0

- name: Build changelog entry text
id: changelog_entry
shell: bash
run: |
entry_text=""
if [ "${{ needs.check-changes.outputs.has_langtags_changes }}" == "true" ]; then
entry_text="[SIL.WritingSystems] Updated embedded langtags.json"
fi
if [ "${{ needs.check-changes.outputs.has_iana_changes }}" == "true" ]; then
if [ -n "$entry_text" ]; then
entry_text="$entry_text"$'\n'"[SIL.WritingSystems] Updated embedded ianaSubtagRegistry.txt"
else
entry_text="[SIL.WritingSystems] Updated embedded ianaSubtagRegistry.txt"
fi
fi
echo "text<<EOF" >> $GITHUB_OUTPUT
echo "$entry_text" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT

- name: Add langtags entry to CHANGELOG.md
if: needs.check-changes.outputs.has_langtags_changes
uses: claudiodekker/changelog-updater@6d9e21971591cfd515ef8cc71b721b767794afd4
with:
section: "Changed"
entry-text: "[SIL.WritingSystems] Updated embedded langtags.json"

- name: Add iana subtags entry to CHANGELOG.md
if: needs.check-changes.outputs.has_iana_changes
uses: claudiodekker/changelog-updater@6d9e21971591cfd515ef8cc71b721b767794afd4
with:
section: "Changed"
entry-text: "[SIL.WritingSystems] Updated embedded ianaSubtagRegistry.txt"

- name: Upload updated changelog
uses: actions/upload-artifact@v4
with:
name: updated-changelog
path: CHANGELOG.md
retention-days: 1

update-langtags:
needs: [check-changes, update-changelog]
runs-on: windows-latest
if: needs.check-changes.outputs.has_changes == 'true'
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
fetch-depth: 0

- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
dotnet-version: '8.0.x'

- name: Download latest langtags.json
if: github.event.inputs.update_langtags == 'true' && needs.check-changes.outputs.has_langtags_changes == 'true'
id: langtags
shell: bash
run: |
if [[ "${{ github.event.inputs.use_staging }}" == "true" ]]; then
url="$LANGTAGS_STAGING_URL"
else
url="$LANGTAGS_PRODUCTION_URL"
fi
echo "Downloading from: $url"
curl -f -o "langtags.json.new" "$url"

# Validate JSON format
if ! jq empty "langtags.json.new" 2>/dev/null; then
echo "Error: Downloaded file is not valid JSON"
exit 1
fi

mv "langtags.json.new" "SIL.WritingSystems/Resources/langtags.json"
echo "Updated langtags.json"
echo "url=$url" >> $GITHUB_OUTPUT

- name: Download latest iana language-subtag-registry
if: github.event.inputs.update_iana == 'true' && needs.check-changes.outputs.has_iana_changes == 'true'
id: subtagregistry
shell: bash
run: |
echo "Downloading from: ${{ env.IANA_URL }}"
curl -f -o "ianaSubtagRegistry.txt.new" "$IANA_URL"
mv "ianaSubtagRegistry.txt.new" "SIL.WritingSystems/Resources/ianaSubtagRegistry.txt"
echo "Updated ianaSubtagRegistry.txt"

- name: Restore & Build SIL.WritingSystems.Tests
shell: bash
run: |
dotnet build SIL.WritingSystems.Tests -p:TargetFramework=net8.0 -p:Configuration=Release
echo "TEST_PATH=$(dotnet msbuild SIL.WritingSystems.Tests/ --getProperty:OutputPath -p:TargetFramework=net8.0 -p:Configuration=Release)" >> $GITHUB_ENV

- name: Test SIL.WritingSystems.Tests
shell: bash
run: |
echo "## Test Summary" >> $GITHUB_STEP_SUMMARY
echo "Executing SIL.WritingSystems.Tests..." >> $GITHUB_STEP_SUMMARY

dotnet test "$TEST_PATH"/SIL.WritingSystems.Tests.dll \
--no-build \
--filter "TestCategory!=SkipOnTeamCity" \
--blame-hang-timeout 5m \
--logger:"trx;LogFilePrefix=results" \
--results-directory ./test-results

EXIT_CODE=$?
if [ $EXIT_CODE -eq 0 ]; then
echo "✅ Tests passed." >> $GITHUB_STEP_SUMMARY
else
echo "❌ Tests failed." >> $GITHUB_STEP_SUMMARY
exit $EXIT_CODE
fi

- name: Download updated changelog
uses: actions/download-artifact@v4
with:
name: updated-changelog
path: .

- name: Create Pull Request
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e
with:
token: ${{ secrets.GITHUB_TOKEN }}
add-paths: |
SIL.WritingSystems/Resources/langtags.json
SIL.WritingSystems/Resources/ianaSubtagRegistry.txt
CHANGELOG.md
commit-message: |
Update embedded writing system data

- Updated: $(date -u '+%Y-%m-%d %H:%M:%S UTC')
- langtags.json: ${{ needs.check-changes.outputs.has_langtags_changes == 'true' && 'Updated' || 'No changes' }}
- ianaSubtagRegistry.txt: ${{ needs.check-changes.outputs.has_iana_changes == 'true' && 'Updated' || 'No changes' }}
- SLDR staging: ${{ github.event.inputs.use_staging }}
title: 'Update embedded writing system data'
body: |
## Automated Writing System Data Update

This PR updates the embedded writing system data files as described in `SIL.WritingSystems/Readme.md`.

**Workflow Run:** [View Summary](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})

**Files Updated:**
- langtags.json: ${{ needs.check-changes.outputs.has_langtags_changes == 'true' && format('✅ Updated from {0}', needs.check-changes.outputs.langtags_url) || '⏭️ Skipped' }}
- ianaSubtagRegistry.txt: ${{ needs.check-changes.outputs.has_iana_changes == 'true' && format('✅ Updated from {0}', env.IANA_URL) || '⏭️ Skipped' }}

**Next Steps:**
- Review the changes
- Run any additional manual tests if needed
- Merge when ready
branch: update-writing-system-data
delete-branch: true

- name: Create summary
if: always()
shell: bash
run: |
echo "## Writing System Data Update Summary" >> $GITHUB_STEP_SUMMARY
echo "- **SLDR Staging Mode**: ${{ github.event.inputs.use_staging }}" >> $GITHUB_STEP_SUMMARY
echo "- **Changes Detected**: ${{ needs.check-changes.outputs.has_changes }}" >> $GITHUB_STEP_SUMMARY
echo "- **Update langtags.json if newer**: ${{ github.event.inputs.update_langtags }}" >> $GITHUB_STEP_SUMMARY
echo "- **Update ianaSubtagRegistry.txt if newer**: ${{ github.event.inputs.update_iana }}" >> $GITHUB_STEP_SUMMARY

if [[ "${{ needs.check-changes.outputs.has_changes }}" == "true" ]]; then
echo "- **Action Taken**: Files updated, tests run, PR created" >> $GITHUB_STEP_SUMMARY
else
echo "- **Action Taken**: No changes detected, no updates needed" >> $GITHUB_STEP_SUMMARY
fi
7 changes: 2 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/).

<!-- Available types of changes:
### Added
### Changed
### Fixed
### Changed
### Deprecated
### Removed
### Security
Expand Down Expand Up @@ -185,6 +185,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- [SIL.Media] Made MediaInfo.FFprobeFolder look for and return the folder when first accessed, even if no prior call to the setter or other action had caused it t be found.
- [SIL.Core] Made GetSafeDirectories not crash and simply not return any subdirectory the user does not have permission to access.
- [SIL.Core] In GetDirectoryDistributedWithApplication, prevented a failure in accessing one of the specified subfolders from allowing it to try the others.
- [SIL.Window.Forms] When choosing a file in the ImageToolbox.AcquireImageControl, a FileOk handler is simulated that verifies the selected file passes the given filter. Users can defeat the filter mechanism by pasting or typing the file name. While the returned filename does not pass the filter, the dialog is reopened until the user either chooses a proper filename or cancels the dialog. The native FileOk handler can prevent the dialog from closing: we can't achieve that. (See BL-13552.)

### Removed

Expand All @@ -199,10 +200,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- [SIL.Archiving] Removed public methods CreateMetsFile and CreateRampPackage from RampArchivingDlgViewModel (made internal).
- [SIL.Archiving] Removed ArchivingPackage and AddSession from ArchivingDlgViewModel and RampArchivingDlgViewModel (where they threw NotImplementedExceptions)

### Fixed

- [SIL.Window.Forms] When choosing a file in the ImageToolbox.AcquireImageControl, a FileOk handler is simulated that verifies the selected file passes the given filter. Users can defeat the filter mechanism by pasting or typing the file name. While the returned filename does not pass the filter, the dialog is reopened until the user either chooses a proper filename or cancels the dialog. The native FileOk handler can prevent the dialog from closing: we can't achieve that. (See BL-13552.)

## [14.1.1] - 2024-05-23

### Fixed
Expand Down
26 changes: 20 additions & 6 deletions SIL.WritingSystems/Readme.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
## SIL.WritingSystems Library
# SIL.WritingSystems Library

This library contains many classes that make working with writing systems and language tags easier

### Updating langtags.json
## SIL Locale Data Repository

To update langtags.json to the latest follow the following steps:
Much of the writing system data that this library provides comes from the [SIL Locale Data Repository (SLDR)](https://github.com/silnrsi/sldr?tab=readme-ov-file#sil-locale-data-repository-sldr).
To test with updated SLDR data from the staging area you can set an environment variable

`SLDR_USE_STAGING=true`

## Updating embedded writing system and language data

There is a github action that can be run to update the `langtags.json` and `ianaSubtagRegistry.txt` which are embedded in the library.
It will download the latest and, after a successful run of the WritingSystems tests, create a PR to update both files.

### langtags.json

The list of language tag identifiers is curated by the Writing Systems Technology group and provided in a `langtags.json` file.
This library is used as the final fallback in case of problems with the data served from https://ldml.api.sil.org/langtags.json

To manually update langtags.json to the latest, follow the following steps:

1. Run the unit test suite by hand and note (or fix) any failures to ByHand and SkipOnTeamCity category tests
1. Replace `Resources\langtags.json` with the content from https://ldml.api.sil.org/langtags.json
1. Run the unit test suite by hand and fix any tests that relied on old langtags data
1. Commit the changes

### Updating ianaSubtagRegistry.txt
To update ianaSubtagRegistry.txt to the latest, replace `Resources\ianaSubtagRegistry.txt` with
### ianaSubtagRegistry.txt
To manually update ianaSubtagRegistry.txt to the latest, replace `Resources\ianaSubtagRegistry.txt` with
the content from https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry

Loading