Skip to content

Commit 728d554

Browse files
authored
Merge pull request #135 from openzim/add-chinese
Add support for simplified and traditional Chinese
2 parents 8b73ead + 4a60d00 commit 728d554

File tree

6 files changed

+61
-30
lines changed

6 files changed

+61
-30
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- Add support for simplified and traditional Chinese (#135)
13+
1014
## [2.0.2] - 2025-11-14
1115

1216
### Changed

offliner-definition.json

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,39 +13,47 @@
1313
"type": "string-enum",
1414
"required": true,
1515
"title": "Language",
16-
"description": "Language of zim file and curriculum.",
16+
"description": "Language of curriculum.",
1717
"choices": [
1818
{
19-
"title": "ENGLISH",
19+
"title": "English",
2020
"value": "eng"
2121
},
2222
{
23-
"title": "ESPANOL",
23+
"title": "Espanol",
2424
"value": "spa"
2525
},
2626
{
27-
"title": "GERMAN",
27+
"title": "German",
2828
"value": "deu"
2929
},
3030
{
31-
"title": "ITALIAN",
31+
"title": "Italian",
3232
"value": "ita"
3333
},
3434
{
35-
"title": "JAPANESE",
35+
"title": "Japanese",
3636
"value": "jpn"
3737
},
3838
{
39-
"title": "PORTOGUESE",
39+
"title": "Portuguese",
4040
"value": "por"
4141
},
4242
{
43-
"title": "UKRAINIAN",
43+
"title": "Ukrainian",
4444
"value": "ukr"
4545
},
4646
{
47-
"title": "SWAHILI",
47+
"title": "Swahili",
4848
"value": "swa"
49+
},
50+
{
51+
"title": "Chinese Simplified",
52+
"value": "zh-hans"
53+
},
54+
{
55+
"title": "Chinese Traditional",
56+
"value": "zh-hant"
4957
}
5058
]
5159
},

scraper/src/fcc2zim/constants.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,11 @@
66
VERSION = __version__
77
ROOT_DIR = pathlib.Path(__file__).parent
88

9-
# key is the language passed at CLI (and used as ZIM Language metadata)
9+
# key is the language passed at CLI
1010
# value is the name of folders used in FCC source code
1111
FCC_LANG_MAP = {
12-
# removed until we settle on these two codes
13-
# "cmn": "chinese",
14-
# "lzh": "chinese-traditional",
12+
"zh-hans": "chinese",
13+
"zh-hant": "chinese-traditional",
1514
"eng": "english",
1615
"spa": "espanol",
1716
"deu": "german",
@@ -21,3 +20,16 @@
2120
"ukr": "ukrainian",
2221
"swa": "swahili",
2322
}
23+
24+
ZIM_LANG_MAP = {
25+
"zh-hans": "zho",
26+
"zh-hant": "zho",
27+
"eng": "eng",
28+
"spa": "spa",
29+
"deu": "deu",
30+
"ita": "ita",
31+
"jpn": "jpn",
32+
"por": "por",
33+
"ukr": "ukr",
34+
"swa": "swa",
35+
}

scraper/src/fcc2zim/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class Context:
2626
# list of courses to include
2727
course: list[str]
2828

29-
# ZIM language, also used to fetch proper FCC content
29+
# language, used to fetch proper FCC content and compute proper ZIM metadata
3030
language: str
3131

3232
# ZIM name (also used for filename if zim_file is not set) and filename

scraper/src/fcc2zim/scraper.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from zimscraperlib.zim import Creator, metadata
1010

1111
from fcc2zim.build import build_command
12-
from fcc2zim.constants import FCC_LANG_MAP, VERSION
12+
from fcc2zim.constants import FCC_LANG_MAP, VERSION, ZIM_LANG_MAP
1313
from fcc2zim.context import Context
1414
from fcc2zim.fetch import fetch_command
1515
from fcc2zim.prebuild import prebuild_command
@@ -41,6 +41,10 @@ def __init__(self):
4141
raise ValueError(f"Unsupported language {context.language}")
4242
self.fcc_lang = FCC_LANG_MAP[context.language]
4343

44+
if context.language not in ZIM_LANG_MAP:
45+
raise ValueError(f"Unsupported language {context.language}")
46+
self.zim_lang = ZIM_LANG_MAP[context.language]
47+
4448
context.description, context.long_description = compute_descriptions(
4549
context.description, context.description, context.long_description
4650
)
@@ -118,7 +122,7 @@ def __init__(self):
118122
self.creator = Creator(self.zim_path, "index.html").config_metadata(
119123
std_metadata=metadata.StandardMetadataList(
120124
Name=metadata.NameMetadata(context.name),
121-
Language=metadata.LanguageMetadata(context.language),
125+
Language=metadata.LanguageMetadata(self.zim_lang),
122126
Title=metadata.TitleMetadata(context.title),
123127
Creator=metadata.CreatorMetadata(context.creator),
124128
Publisher=metadata.PublisherMetadata(context.publisher),

scraper/tests/test_scraper.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -167,27 +167,30 @@ def test_zimui_dist_ko(self):
167167
self.create_scraper(zimui_dist=Path("whatever"))
168168

169169
@pytest.mark.parametrize(
170-
"language, expected_fcc_lang",
170+
"language, expected_fcc_lang, expected_zim_lang",
171171
[
172-
pytest.param("eng", "english", id="english"),
173-
pytest.param("eng", "english", id="english"),
174-
# pytest.param("cmn", "chinese", id="chinese"),
175-
# pytest.param("lzh", "chinese-traditional", id="chinese-traditional"),
176-
pytest.param("eng", "english", id="english"),
177-
pytest.param("spa", "espanol", id="espanol"),
178-
pytest.param("deu", "german", id="german"),
179-
pytest.param("ita", "italian", id="italian"),
180-
pytest.param("jpn", "japanese", id="japanese"),
181-
pytest.param("por", "portuguese", id="portuguese"),
182-
pytest.param("ukr", "ukrainian", id="ukrainian"),
183-
pytest.param("swa", "swahili", id="swahili"),
172+
pytest.param("zh-hans", "chinese", "zho", id="chinese"),
173+
pytest.param(
174+
"zh-hant", "chinese-traditional", "zho", id="chinese-traditional"
175+
),
176+
pytest.param("eng", "english", "eng", id="english"),
177+
pytest.param("spa", "espanol", "spa", id="espanol"),
178+
pytest.param("deu", "german", "deu", id="german"),
179+
pytest.param("ita", "italian", "ita", id="italian"),
180+
pytest.param("jpn", "japanese", "jpn", id="japanese"),
181+
pytest.param("por", "portuguese", "por", id="portuguese"),
182+
pytest.param("ukr", "ukrainian", "ukr", id="ukrainian"),
183+
pytest.param("swa", "swahili", "swa", id="swahili"),
184184
],
185185
)
186-
def test_fcc_lang_ok(self, language: str, expected_fcc_lang: str):
186+
def test_fcc_lang_ok(
187+
self, language: str, expected_fcc_lang: str, expected_zim_lang: str
188+
):
187189
scraper = self.create_scraper(language=language)
188190
context = Context.get()
189191
assert context.language == language
190192
assert scraper.fcc_lang == expected_fcc_lang
193+
assert scraper.zim_lang == expected_zim_lang
191194

192195
def test_language_ko(self):
193196
with pytest.raises(ValueError):

0 commit comments

Comments
 (0)