Skip to content

Commit 4e485d8

Browse files
authored
Fix translation of file types in CWS (#1475)
Previously, we tried to find shared-mime-info.mo and use that to translate the descriptions obtained from xdg.Mime into the right language. This was not the correct approach: first, the path to shared-mime-info.mo varied (on Ubuntu it was in locale-langpack instead of locale), and second, sometimes (e.g. on Fedora) it was not present at all. The right place to find these translations is in the mimetype xml files themselves. However, this caused other issues: first, pyxdg does not have proper support for using more than one language. I hacked around this by setting the (global) language before each call, and clearing pyxdg's own cache. And then adding my own cache on top that can cache multiple languages. The second problem was that on Debian/Ubuntu, the xml:lang tags that specify the language codes for each string use an incorrect format: they use POSIX-style locale names (like zh_CN) instead of XML-compliant BCP47 language codes (like zh-Hans-CN). pyxdg only knows how to handle these incorrect names, so I had to implement a hack to get it to recognize the correct names too. Also deleted the undocumented and now-unused shared_mime_info_prefix config option, and mentioned shared-mime-info as a dependency in the installation docs.
1 parent 5f111c5 commit 4e485d8

File tree

8 files changed

+44
-36
lines changed

8 files changed

+44
-36
lines changed

cms/conf.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,6 @@ def __init__(self):
140140
self.max_input_length = 5_000_000 # 5 MB
141141
self.stl_path = "/usr/share/cppreference/doc/html/"
142142
self.docs_path = None
143-
# Prefix of 'shared-mime-info'[1] installation. It can be found
144-
# out using `pkg-config --variable=prefix shared-mime-info`, but
145-
# it's almost universally the same (i.e. '/usr') so it's hardly
146-
# necessary to change it.
147-
# [1] http://freedesktop.org/wiki/Software/shared-mime-info
148-
self.shared_mime_info_prefix = "/usr"
149143
self.contest_admin_token = None
150144

151145
# AdminWebServer.

cms/locale/locale.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import copy
3434
import logging
3535
import math
36-
import os
3736

3837
import babel.core
3938
import babel.dates
@@ -43,8 +42,8 @@
4342
import babel.units
4443
import importlib.resources
4544

46-
from cms import config
4745
from cmscommon.datetime import utc
46+
from cmscommon.mimetypes import get_name_for_type
4847
from datetime import datetime, tzinfo, timedelta
4948

5049

@@ -70,9 +69,6 @@ def __init__(self, lang_code, mofile=None):
7069
self.translation = babel.support.Translations(mofile, domain="cms")
7170
else:
7271
self.translation = babel.support.NullTranslations()
73-
self.mimetype_translation = babel.support.Translations.load(
74-
os.path.join(config.shared_mime_info_prefix, "share", "locale"),
75-
[self.locale], "shared-mime-info")
7672

7773
@property
7874
def identifier(self) -> str:
@@ -266,7 +262,11 @@ def format_locale(self, code: str) -> str:
266262
return code
267263

268264
def translate_mimetype(self, mimetype: str) -> str:
269-
return self.mimetype_translation.gettext(mimetype)
265+
lang_code = self.identifier
266+
alt_lang_code = babel.core.get_locale_identifier(
267+
(self.locale.language, self.locale.territory), sep="_"
268+
)
269+
return get_name_for_type(mimetype, lang_code, alt_lang_code)
270270

271271

272272
DEFAULT_TRANSLATION = Translation("en")

cms/server/contest/templates/task_description.html

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,8 @@ <h2>{% trans %}Attachments{% endtrans %}</h2>
177177
{% for filename, attachment in task.attachments|dictsort(by="key") %}
178178
{% set mime_type = get_mimetype_for_file_name(filename) %}
179179
{% if mime_type is not none %}
180-
{% set type_name = get_name_for_mimetype(mime_type) %}
181180
{% set type_icon = get_icon_for_mimetype(mime_type) %}
182181
{% else %}
183-
{% set type_name = none %}
184182
{% set type_icon = none %}
185183
{% endif %}
186184
{% set file_size = handler.application.service.file_cacher.get_size(attachment.digest) %}
@@ -195,8 +193,8 @@ <h2>{% trans %}Attachments{% endtrans %}</h2>
195193
<span class="name">{{ filename }}</span>
196194
<span class="size">{{ file_size|format_size }}</span>
197195
</span>
198-
{% if type_name is not none %}
199-
<span class="type">{{ translation.translate_mimetype(type_name) }}</span>
196+
{% if mime_type is not none %}
197+
<span class="type">{{ translation.translate_mimetype(mime_type) }}</span>
200198
{% endif %}
201199
</a>
202200
</li>

cms/server/jinja2_toolbox.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@
4242
from cmscommon.constants import \
4343
SCORE_MODE_MAX, SCORE_MODE_MAX_SUBTASK, SCORE_MODE_MAX_TOKENED_LAST
4444
from cmscommon.datetime import make_datetime, make_timestamp, utc, local_tz
45-
from cmscommon.mimetypes import get_type_for_file_name, get_name_for_type, \
46-
get_icon_for_type
45+
from cmscommon.mimetypes import get_type_for_file_name, get_icon_for_type
4746

4847

4948
@contextfilter
@@ -211,7 +210,6 @@ def instrument_cms_toolbox(env: Environment):
211210
env.globals["get_score_type"] = safe_get_score_type
212211

213212
env.globals["get_mimetype_for_file_name"] = get_type_for_file_name
214-
env.globals["get_name_for_mimetype"] = get_name_for_type
215213
env.globals["get_icon_for_mimetype"] = get_icon_for_type
216214

217215
env.filters["to_language"] = get_language

cmscommon/mimetypes.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616
# You should have received a copy of the GNU Affero General Public License
1717
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1818

19+
import functools
1920
import os.path
2021

2122
import xdg.BaseDirectory
2223
import xdg.Mime
24+
import xdg.Locale
2325

2426

2527
__all__ = [
@@ -59,17 +61,36 @@ def get_icon_for_type(typename: str) -> str:
5961
return _icons[typename]
6062
return mimetype.media + "-x-generic"
6163

62-
63-
def get_name_for_type(typename: str) -> str:
64+
# xdg.Mime is by default memoized, but since we need to change the language, we
65+
# need to wipe the cache to load the correct language. So use our own caching
66+
# on top of it.
67+
@functools.cache
68+
def get_name_for_type(typename: str, language: str, alt_language: str) -> str:
6469
"""Get the natural language description of the MIME type.
6570
6671
typename: a MIME type, e.g., "application/pdf".
72+
language: the BCP47 code of the language for which to return the result.
73+
alt_language: underscore-separated form of the language code, to work
74+
around incorrect behavior in pyxdg.
6775
6876
return: the human-readable description (also called comment)
6977
of the given MIME type, e.g., "PDF document".
7078
7179
"""
80+
# pyxdg expects the locale field to be provided as a posix-style locale
81+
# name, e.g. zh_CN. It assumes this in both the provided language name, and
82+
# in the xml:lang attribute of the mimetype xml files. Some distributions
83+
# instead use BCP47 language codes, e.g. zh-Hans-CN, in the mimetype xml
84+
# files (which is semantically more correct, as this is mandated by the xml
85+
# spec).
86+
# First parse the language from the posix format.
87+
xdg.Locale.update(alt_language)
88+
# Then, we make pyxdg think the BCP47 code is another variant of the
89+
# current language name.
90+
xdg.Locale.langs += [language]
7291
mimetype = xdg.Mime.lookup(typename).canonical()
92+
# Force reloading the comment, because the language might have changed.
93+
mimetype._comment = None
7394
return mimetype.get_comment()
7495

7596

cmstestsuite/unit_tests/cmscommon/mimetypes_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ def test_generic(self):
4242
class TestGetNameForType(unittest.TestCase):
4343

4444
def test_basic(self):
45-
self.assertEqual(get_name_for_type("application/pdf"),
45+
self.assertEqual(get_name_for_type("application/pdf", "en", "en"),
4646
"PDF document")
4747

4848
def test_alias(self):
49-
self.assertEqual(get_name_for_type("text/x-octave"),
49+
self.assertEqual(get_name_for_type("text/x-octave", "en", "en"),
5050
"MATLAB file")
5151

5252

cmstestsuite/unit_tests/locale/locale_test.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
ITALIAN = Translation("it")
5454
DANISH = Translation("da")
5555
CHINESE = Translation("zh_CN")
56+
CHINESE_TRADITIONAL = Translation("zh_TW")
5657

5758

5859
class TestIdentifier(unittest.TestCase):
@@ -591,20 +592,15 @@ def test_localized_decimal_and_thousands_separators(self):
591592

592593
class TestTranslateMimetype(unittest.TestCase):
593594

594-
@unittest.skipIf(not os.path.isfile(
595-
"/usr/share/locale/it/LC_MESSAGES/shared-mime-info.mo"),
596-
reason="need Italian shared-mime-info translation")
597595
def test_translate_mimetype(self):
598-
self.assertEqual(ENGLISH.translate_mimetype("PDF document"),
596+
self.assertEqual(ENGLISH.translate_mimetype("application/pdf"),
599597
"PDF document")
600-
self.assertEqual(ITALIAN.translate_mimetype("PDF document"),
598+
self.assertEqual(ITALIAN.translate_mimetype("application/pdf"),
601599
"Documento PDF")
602-
603-
def test_graceful_failure(self):
604-
self.assertEqual(ENGLISH.translate_mimetype("Not a MIME type"),
605-
"Not a MIME type")
606-
self.assertEqual(ITALIAN.translate_mimetype("Not a MIME type"),
607-
"Not a MIME type")
600+
self.assertEqual(CHINESE.translate_mimetype("application/pdf"),
601+
"PDF 文档")
602+
self.assertEqual(CHINESE_TRADITIONAL.translate_mimetype("application/pdf"),
603+
"PDF 文件")
608604

609605

610606
class TestFilterLanguageCodes(unittest.TestCase):

docs/Installation.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ On Ubuntu 24.04, one will need to run the following script as root to satisfy al
7070
postgresql postgresql-client \
7171
python3.12 python3.12-dev python3-pip python3-venv \
7272
libpq-dev libcups2-dev libyaml-dev libffi-dev \
73-
cppreference-doc-en-html zip curl
73+
shared-mime-info cppreference-doc-en-html zip curl
7474

7575
# Isolate from upstream package repository
7676
echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/isolate.asc] http://www.ucw.cz/isolate/debian/ noble-isolate main' >/etc/apt/sources.list.d/isolate.list
@@ -92,7 +92,8 @@ On Arch Linux, run the following commands as root to install almost all dependen
9292
.. sourcecode:: bash
9393

9494
pacman -S base-devel jdk8-openjdk fpc postgresql postgresql-client \
95-
python python-pip postgresql-libs libcups libyaml
95+
python python-pip postgresql-libs libcups libyaml \
96+
shared-mime-info
9697

9798
# Install the following from AUR.
9899
# https://aur.archlinux.org/packages/cppreference/

0 commit comments

Comments
 (0)