beeware · johnzhou721 · Jun 28, 2025 · Jun 29, 2025 · Jun 29, 2025 · Jun 29, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Changelog
 
+## 0.5.5
+
+* Ensure that POT content is now sorted by path when merging POTs from multiple sources (i.e., templates and content).
+* `xgettext` is used to merge POT files instead of `msgcat`, providing a better header and merging of same strings from different sources.
+* The initially generated PO files will now have a header compatible with GNOME's Translation Editor, since they will have a non-placeholder `Project-Id-Version`. Existing users hitting this problem will need to fill in the `Project-Id-Version` header manually.
+* Translations in templates now provide `pgettext` and `npgettext` methods.
+* The bug where deletion of strings from the English PO file with non-English content is resolved.
+* When updating translated PO files, the content-language PO file strings are automatically filled with the message IDs. Side effects with plurals are documented.
+* A bug with discriminating between Markdown headings and Lektor block seperations has been fixed; the older heuristic with checking for colons in the previous line is replaced with simply checking for 3 dashes after stripped.
+
 ## 0.5.4
 
 * POT content is now sorted by path.

diff --git a/README.md b/README.md
@@ -40,6 +40,20 @@ A `babel.cfg` must be created in your project root with the following content:
     [jinja2: **/templates/**.html]
     encoding = utf-8
 
+If you plan to extract from your templates, and the templates use functionality provided in Jinja2 extensions, specify
+something like the following on an additional line in the config file.  For example, if you use ``do`` statements, the
+configuration file shall be:
+```
+[jinja2: **/templates/**.html]
+encoding = utf-8
+extensions = jinja2.ext.do
+```
+
+#### Whitespace Trimming during Extraction
+
+If you're using `{% trans %}` blocks in your template files, the `trimmed` policy is enabled for Jinja's i18n plugin, so all whitespaces would be trimmed at the beginning and end of those
+blocks.  However, in order for PyBabel's extraction to also work properly this way, one shall add `trimmed = True` to the jinja2 section of the `babel.cfg` configuration file.
+
 ### Translatable fields
 
 In order for a field to be marked as translatable, an option has to be set in the field definition. Both blocks and flowblocks fields are translatable.
@@ -96,12 +110,6 @@ For example:
 
 As with the previous example, `body` and `title` field content will be translated. However, in this example, `image` and `image_position` will not.
 
-### Non-english content
-
-Due to a limitation of `msginit`, it is difficult to translate a site when the primary language is set to anything but English.
-
-If your default content language is not English, you will have to edit the first `contents-en.po` file and remove the translations.
-
 ## Installation
 
 ### Prerequisites
@@ -166,6 +174,12 @@ All translation files (`contents-*.po`) are then compiled and merged with the or
 
 You must run `lektor build` once to generate the list of `contents-xx.po` files. After that, once a translation change is applied to a `contents-xx.po` file, the site must be built again for the changes to be applied to the associated `contents-xx.lr` file. This results in the changes being rendered on the site.
 
+### Plural Forms
+
+If you're using `{% pluralize %}` or `ngettext` or the like in your Jinja templates, make sure you fill in the plural forms in the PO headers manually, then make sure you have the correct
+number of `msgstr[x]`s.  The plugin automatically fills `msgstr`s into the PO file of your source lanaguage (which msginit only does for English), but since it doesn't parse plural forms,
+any non-English PO file will not have its plural message strings filled in.  Those must be done manually in the source-language PO file if simply singular and plural strings does not suffice .
+
 ### Project file
 
 You must modify the `.lektorproject` file to include the expected languages.

diff --git a/lektor_i18n.py b/lektor_i18n.py
@@ -5,10 +5,11 @@
 import re
 import tempfile
 import time
-from os.path import exists, join, relpath
+from os.path import exists, join, relpath, basename
 from pprint import PrettyPrinter
 from textwrap import dedent
 from urllib.parse import urljoin
+import polib
 
 from lektor.context import get_ctx
 from lektor.db import Page
@@ -55,6 +56,14 @@ def ngettext(self, *x):
         self.init_translator()
         return self.translator.ngettext(*x)
 
+    def pgettext(self, *x):
+        self.init_translator()
+        return self.translator.pgettext(*x)
+
+    def npgettext(self, *x):
+        self.init_translator()
+        return self.translator.npgettext(*x)
+
 
 class Translations:
     """Memory of translations"""
@@ -136,15 +145,29 @@ def write_pot(self, pot_filename, language):
             f.write(self.as_pot(language, header))
 
     @staticmethod
-    def merge_pot(from_filenames, to_filename):
-        msgcat = locate_executable("msgcat")
-        if msgcat is None:
-            msgcat = "/usr/bin/msgcat"
-        cmdline = [msgcat, "--use-first"]
+    def merge_pot(from_filenames, to_filename, projectname):
+        # Get the POT Creation Date of the first file and inject it later.
+        pattern = r'("POT-Creation-Date:\s*)(\d{4}-\d{2}-\d{2}.*)(\\n")'
+        with open(from_filenames[0], 'r', encoding='utf-8') as f:
+            original_file1 = f.read()
+        date1 = re.search(pattern, original_file1).group(2)
+
+        xgettext = locate_executable("xgettext")
+        if xgettext is None:
+            xgettext = "/usr/bin/xgettext"
+        cmdline = [xgettext, "--sort-by-file", "--package-name=" + projectname, "--package-version=1.0"]
         cmdline.extend(from_filenames)
         cmdline.extend(("-o", to_filename))
-        reporter.report_debug_info("msgcat cmd line", cmdline)
+        reporter.report_debug_info("xgettext cmd line", cmdline)
         portable_popen(cmdline).wait()
+
+        # Inject the creation date back into the produced file
+        with open(to_filename, 'r', encoding='utf-8') as f:
+            finishedfile_orig = f.read()
+        replacement = r'\g<1>' + date1 + r'\g<3>'
+        finishedcontent = re.sub(pattern, replacement, finishedfile_orig, count=1)
+        with open(to_filename, 'w', encoding='utf-8') as f:
+            f.write(finishedcontent)
 
     @staticmethod
     def parse_templates(to_filename):
@@ -158,6 +181,51 @@ def parse_templates(to_filename):
 
 translations = Translations()  # let's have a singleton
 
+def clear_entry(entry):
+    entry.msgstr = ''
+    if entry.msgstr_plural:
+        for idx in entry.msgstr_plural:
+            entry.msgstr_plural[idx] = ''
+    if 'fuzzy' in entry.flags:
+        entry.flags.remove('fuzzy')
+
+def clear_translations(po_filepath, save_path=None):
+    po = polib.pofile(po_filepath)
+    for entry in po:
+        clear_entry(entry)
+
+    po.save(save_path or po_filepath)
+
+def fill_translations(po_filepath, save_path=None):
+    po = polib.pofile(po_filepath)
+
+    for entry in po:
+        # If we fuzzy-matched, we'd need to properly re-fill
+        # the entries so we clear. Particularly important is
+        # that when you add the plural form of a string...
+        # msgmerge seem to fill the plural field with the
+        # singular one, and mark it fuzzy... incorrect within
+        # source language.
+        if entry.fuzzy:
+            clear_entry(entry)
+
+        # Actually fill in the entries with msgid within the
+        # source language.
+        if not entry.msgstr:
+            entry.msgstr = entry.msgid
+
+        need_plural_fill = False
+        if entry.msgstr_plural:
+            for idx in entry.msgstr_plural:
+                if not entry.msgstr_plural[idx]:
+                    need_plural_fill = True
+        if need_plural_fill and '+en.po' in basename(po_filepath):
+            for idx in entry.msgstr_plural:
+                if not entry.msgstr_plural[idx]:
+                    entry.msgstr_plural[idx] = entry.msgid if int(idx) == 0 else entry.msgid_plural
+
+    po.save(save_path or po_filepath)
+
 
 class POFile:
     FILENAME_PATTERN = "contents+{}.po"
@@ -186,6 +254,8 @@ def _msg_init(self):
         ]
         reporter.report_debug_info("msginit cmd line", cmdline)
         portable_popen(cmdline, cwd=self.i18npath).wait()
+        clear_translations(os.path.join(self.i18npath, self.FILENAME_PATTERN.format(self.language)))
+        self.reformat()
 
     def _msg_merge(self):
         """Merges an existing <language>.po file with .pot file"""
@@ -201,6 +271,11 @@ def _msg_merge(self):
         ]
         reporter.report_debug_info("msgmerge cmd line", cmdline)
         portable_popen(cmdline, cwd=self.i18npath).wait()
+
+    def reformat(self):
+        msgcat = locate_executable("msgcat")
+        cmdline = [msgcat, self.FILENAME_PATTERN.format(self.language), "-o", self.FILENAME_PATTERN.format(self.language)]
+        portable_popen(cmdline, cwd=self.i18npath).wait()
 
     def _prepare_locale_dir(self):
         """Prepares the i18n/<language>/LC_MESSAGES/ to store the .mo file;
@@ -238,20 +313,6 @@ def compile(self):
             self._msg_fmt(locale_dirname)
 
 
-def line_starts_new_block(line, prev_line):
-    """
-    Detect a new block in a Lektor document. Blocks are delimited by a line
-    containing 3 or more dashes. This actually matches the definition of a
-    markdown level 2 heading, so this function returns False if no colon was
-    found in the line before, e.g. it isn't a new block with a key: value pair
-    before.
-    """
-    if not prev_line or ":" not in prev_line:
-        return False  # could be a Markdown heading
-    line = line.strip()
-    return line == "-" * len(line) and len(line) >= 3
-
-
 def split_paragraphs(document):
     if isinstance(document, (list, tuple)):
         document = "".join(document)  # list of lines
@@ -394,19 +455,30 @@ def __parse_source_structure(lines):
         blocks = []
         count_lines_block = 0  # counting the number of lines of the current block
         is_content = False
-        prev_line = None
+        flow_level = 3
         for line in lines:
             stripped_line = line.strip()
             if not stripped_line:  # empty line
                 blocks.append(("raw", "\n"))
                 continue
-            # line like "---*" or a new block tag
-            if line_starts_new_block(stripped_line, prev_line) or block2re.search(
-                stripped_line
-            ):
+            # New block tag.
+            # The following two ifs will determine the start of a new "block" of content that we can further
+            # parse.  Special care is needed, as the amount of allowed -s dictate whether it's a Markdown heading
+            # or a flow / field seperation.
+            if block2re.search(stripped_line):
+                count_lines_block = 0
+                is_content = False
+                blocks.append(("raw", line))
+                # Count the amount of preceding #s, as that determines the amount of -s allowed
+                # before it gets counted as a Markdown heading.
+                flow_level = len(stripped_line) - len(stripped_line.lstrip('#'))
+            # You're allowed to have between 3 and your maximum allowed number of -s.
+            elif stripped_line == '-' * len(stripped_line) and 3 <= len(stripped_line) <= flow_level:
                 count_lines_block = 0
                 is_content = False
                 blocks.append(("raw", line))
+                # If there's less -s than the flow level, back down on the amount of allowed -s.
+                flow_level = len(stripped_line)
             else:
                 count_lines_block += 1
                 match = command_re.search(stripped_line)
@@ -423,7 +495,6 @@ def __parse_source_structure(lines):
                     is_content = True
             if is_content:
                 blocks.append(("translatable", line))
-            prev_line = line
         # join neighbour blocks of same type
         newblocks = []
         for type, data in blocks:
@@ -558,7 +629,7 @@ def on_after_build_all(self, builder, **extra):
         reporter.report_generic(f"{relpath(pots[0], builder.env.root_path)} generated")
         pots = [p for p in pots if os.path.exists(p)]  # only keep existing ones
         if len(pots) > 1:
-            translations.merge_pot(pots, contents_pot_filename)
+            translations.merge_pot(pots, contents_pot_filename, self.env.project.name)
             reporter.report_generic(
                 f"Merged POT files "
                 f"{', '.join(relpath(p, builder.env.root_path) for p in pots)}"
@@ -567,3 +638,6 @@ def on_after_build_all(self, builder, **extra):
         for language in self.translations_languages:
             po_file = POFile(language, self.i18npath)
             po_file.generate()
+            if language == self.content_language:
+                fill_translations(os.path.join(po_file.i18npath, po_file.FILENAME_PATTERN.format(po_file.language)))
+                po_file.reformat()
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,9 @@ authors = [
 maintainers = [
     {name="BeeWare Team", email="[email protected]"},
 ]
+dependencies = [
+    "polib",
+]
 
 [project.optional-dependencies]
 # Extras used by developers *of* briefcase are pinned to specific versions to