From 0567f5f4c02bf1291b17730b3dc4ddd906eff30f Mon Sep 17 00:00:00 2001 From: gemmaro Date: Mon, 23 Feb 2026 14:53:02 +0900 Subject: [PATCH 1/3] Define YAML separator constant. --- lib/Locale/Po4a/TransTractor.pm | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/Locale/Po4a/TransTractor.pm b/lib/Locale/Po4a/TransTractor.pm index 57054740a..45a0d9638 100644 --- a/lib/Locale/Po4a/TransTractor.pm +++ b/lib/Locale/Po4a/TransTractor.pm @@ -1168,6 +1168,10 @@ sub get_out_charset { return 'UTF-8'; } +# See also "2.2 Structures" section[1]. +# [1] https://yaml.org/spec/1.2.2/#22-structures +use constant YAML_SEPARATOR => "---"; + # Push the translation of a Yaml document or Yaml Front-Matter header, parsed by YAML::Tiny in any case # $is_yfm is a boolean indicating whether we are dealing with a Front Matter (true value) or whole document (false value) sub handle_yaml { @@ -1180,13 +1184,13 @@ sub handle_yaml { # An empty document if ( !defined $cursor ) { - $self->pushline("---\n"); + $self->pushline( YAML_SEPARATOR . "\n" ); # Do nothing # A scalar document } elsif ( !ref $cursor ) { - $self->pushline("---\n"); + $self->pushline( YAML_SEPARATOR . "\n" ); $self->pushline( format_scalar( $self->translate( @@ -1200,19 +1204,19 @@ sub handle_yaml { # A list at the root } elsif ( ref $cursor eq 'ARRAY' ) { if (@$cursor) { - $self->pushline("---\n"); + $self->pushline( YAML_SEPARATOR . "\n" ); do_array( $self, $is_yfm, $blockref, $cursor, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ); } else { - $self->pushline("---[]\n"); + $self->pushline( YAML_SEPARATOR . "[]\n" ); } # A hash at the root } elsif ( ref $cursor eq 'HASH' ) { if (%$cursor) { - $self->pushline("---\n"); + $self->pushline( YAML_SEPARATOR . "\n" ); do_hash( $self, $is_yfm, $blockref, $cursor, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ); } else { - $self->pushline("--- {}\n"); + $self->pushline( YAML_SEPARATOR . " {}\n" ); } } else { From 46c7ff318f9aecd50d778383c555275d86b3d7be Mon Sep 17 00:00:00 2001 From: gemmaro Date: Mon, 23 Feb 2026 15:22:36 +0900 Subject: [PATCH 2/3] Split YAML front matter module from Text module. * lib/Locale/Po4a/Text.pm (parse_markdown_yaml_front_matter): Move YAML front matter subroutine to the dedicated module. * lib/Locale/Po4a/YamlFrontMatter.pm: Add module with POD document. [parse_yaml_front_matter]: Removed the unnecessary line argument. Renamed to Markdown agnostic one. --- lib/Locale/Po4a/Text.pm | 121 ++++---------------- lib/Locale/Po4a/YamlFrontMatter.pm | 172 +++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+), 102 deletions(-) create mode 100644 lib/Locale/Po4a/YamlFrontMatter.pm diff --git a/lib/Locale/Po4a/Text.pm b/lib/Locale/Po4a/Text.pm index a241bc79f..4ba6cc3b4 100644 --- a/lib/Locale/Po4a/Text.pm +++ b/lib/Locale/Po4a/Text.pm @@ -50,10 +50,9 @@ use 5.16.0; use strict; use warnings; -use parent qw(Locale::Po4a::TransTractor); +use parent qw(Locale::Po4a::TransTractor Locale::Po4a::YamlFrontMatter); use Locale::Po4a::Common qw(wrap_mod dgettext); -use YAML::Tiny; use Syntax::Keyword::Try; =head1 OPTIONS ACCEPTED BY THIS MODULE @@ -151,10 +150,6 @@ match. If B and B are used together, values are included if they are matched by at least one of the options. Array values are always translated, unless the B option is provided. -=cut - -my %yfm_keys = (); - =item B (markdown only) Allow the YAML Front Matter parser to fail on malformated headers. This is @@ -162,14 +157,8 @@ particularly helpful when your file starts with a horizontal ruler instead of a YAML Front Matter, but you insist on using three dashes only for your ruler. -=cut - -my $yfm_lenient = 0; - =item B (markdown only) -=item B - Comma-separated list of hash paths to process for extraction in the YAML Front Matter section, all other paths are skipped. Paths are matched with a case-sensitive match. If B and B are used together, @@ -177,18 +166,10 @@ values are included if they are matched by at least one of the options. Arrays values are always returned unless the B option is provided. -=cut - -my %yfm_paths = (); - =item B (markdown-only) Do not translate array values in the YAML Front Matter section. -=cut - -my $yfm_skip_array = 0; - =item B[B<=>I] Handle Debian's control files. @@ -250,18 +231,20 @@ sub initialize { if ( defined $options{'markdown'} ) { $parse_func = \&parse_markdown; $markdown = 1; + + my %yfm_keys; map { $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them $yfm_keys{$_} = 1 } ( split( ',', $self->{options}{'yfm_keys'} ) ); + $self->{options}{yfm_keys} = \%yfm_keys; + + my %yfm_paths; map { $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them $yfm_paths{$_} = 1 } ( split( ',', $self->{options}{'yfm_paths'} ) ); - - # map { print STDERR "key $_\n"; } (keys %yfm_keys); - $yfm_skip_array = $self->{options}{'yfm_skip_array'}; - $yfm_lenient = $self->{options}{'yfm_lenient'}; + $self->{options}{yfm_paths} = \%yfm_paths; } else { foreach my $opt (qw(yfm_keys yfm_lenient yfm_skip_array)) { die wrap_mod( "po4a::text", dgettext( "po4a", "Option %s is only valid when parsing markdown files." ), @@ -596,83 +579,6 @@ sub parse_markdown_bibliographic_information { } } -# Support YAML Front Matter in Markdown documents -# -# If the text starts with a YAML ---\n separator, the full text until -# the next YAML ---\n separator is considered YAML metadata. The ...\n -# "end of document" separator can be used at the end of the YAML -# block. -# -sub parse_markdown_yaml_front_matter { - my ( $self, $line, $blockref ) = @_; - my $yfm; - my @saved_ctn; - my ( $nextline, $nextref ) = $self->shiftline(); - push @saved_ctn, ( $nextline, $nextref ); - while ( defined($nextline) ) { - last if ( $nextline =~ /^(---|\.\.\.)$/ ); - $yfm .= $nextline; - ( $nextline, $nextref ) = $self->shiftline(); - if ( $nextline =~ /: [\[\{]/ ) { - die wrap_mod( - "po4a::text", - dgettext( - "po4a", - "Inline lists and dictionaries on a single line are not correctly handled the parser we use (YAML::Tiny): they are interpreted as regular strings. " - . "Please use multi-lines definitions instead. Offending line:\n %s" - ), - $nextline - ); - - } - push @saved_ctn, ( $nextline, $nextref ); - } - - my $yamlarray; # the parsed YFM content - my $yamlres; # containing the parse error, if any - try { - $yamlarray = YAML::Tiny->read_string($yfm); - } catch { - $yamlres = $@; - } - - if ( defined($yamlres) ) { - if ($yfm_lenient) { - $yamlres =~ s/ at .*$//; # Remove the error localisation in YAML::Tiny die message, if any (for our test) - warn wrap_mod( - "po4a::text", - dgettext( - "po4a", - "Proceeding even if the YAML Front Matter could not be parsed. Remove the 'yfm_lenient' option for a stricter behavior.\nIgnored error: %s" - ), - $yamlres - ); - my $len = ( scalar @saved_ctn ) - 1; - while ( $len >= 0 ) { - $self->unshiftline( $saved_ctn[ $len - 1 ], $saved_ctn[$len] ); - - # print STDERR "Unshift ".$saved_ctn[ $len - 1] ." | ". $saved_ctn[$len] ."\n"; - $len -= 2; - } - return 0; # Not a valid YAML - } else { - die wrap_mod( - "po4a::text", - dgettext( - "po4a", - "Could not get the YAML Front Matter from the file. If you did not intend to add a YAML front matter " - . "but an horizontal ruler, please use '----' instead, or pass the 'yfm_lenient' option.\nError: %s\nContent of the YFM: %s" - ), - $yamlres, $yfm - ); - } - } - - $self->handle_yaml( 1, $blockref, $yamlarray, \%yfm_keys, $yfm_skip_array, \%yfm_paths ); - $self->pushline("---\n"); - return 1; # Valid YAML -} - sub parse_markdown { my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_; if ($expect_header) { @@ -685,7 +591,18 @@ sub parse_markdown { parse_markdown_bibliographic_information( $self, $line, $ref ); return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); } elsif ( $line =~ /^---$/ ) { - if ( parse_markdown_yaml_front_matter( $self, $line, $ref ) ) { # successfully parsed + if ( + $self->parse_yaml_front_matter( + $ref, + { + keys => $self->{options}{yfm_keys}, + skip_array => $self->{options}{yfm_skip_array}, + paths => $self->{options}{yfm_paths}, + lenient => $self->{options}{yfm_lenient}, + } + ) + ) + { # successfully parsed return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); } diff --git a/lib/Locale/Po4a/YamlFrontMatter.pm b/lib/Locale/Po4a/YamlFrontMatter.pm new file mode 100644 index 000000000..950042217 --- /dev/null +++ b/lib/Locale/Po4a/YamlFrontMatter.pm @@ -0,0 +1,172 @@ +#!/usr/bin/perl -w + +# Po4a::Text.pm +# +# extract and translate translatable strings from a text documents +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::YamlFrontMatter - parse YAML front matter + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::YamlFrontMatter is a module to parse YAML front matter, +especially in Markdown documents. This is intended to be used by +other format modules such as C module. + +=cut + +package Locale::Po4a::YamlFrontMatter; + +use 5.16.0; +use strict; +use warnings; + +use parent qw(Locale::Po4a::TransTractor); + +use Locale::Po4a::Common qw(wrap_mod dgettext); +use YAML::Tiny; +use Syntax::Keyword::Try; + +=head1 FUNCTIONS + +=head2 C + +Parse YAML Front Matter (especially in Markdown documents). + +If the text starts with a YAML C<---\n> separator, the full text until +the next YAML C<---\n> separator is considered YAML metadata. The +C<...\n> "end of document" separator can be used at the end of the +YAML block. + +It takes three arguments C<$ref>, and C<$options>. C<$options> is a +hash reference which has keys C, C, C, and +C. + +Returns truthy value if it is a valid YAML, otherwise returns falthy +value. + +=cut + +sub parse_yaml_front_matter { + my ( $self, $blockref, $options ) = @_; + my $keys = $options->{keys}; + my $skip_array = $options->{skip_array}; + my $paths = $options->{paths}; + my $lenient = $options->{lenient}; + + my $yfm; + my @saved_ctn; + my ( $nextline, $nextref ) = $self->shiftline(); + push @saved_ctn, ( $nextline, $nextref ); + while ( defined($nextline) ) { + last if ( $nextline =~ /^(---|\.\.\.)$/ ); + $yfm .= $nextline; + ( $nextline, $nextref ) = $self->shiftline(); + if ( $nextline =~ /: [\[\{]/ ) { + die wrap_mod( + "po4a::text", + dgettext( + "po4a", + "Inline lists and dictionaries on a single line are not correctly handled the parser we use (YAML::Tiny): they are interpreted as regular strings. " + . "Please use multi-lines definitions instead. Offending line:\n %s" + ), + $nextline + ); + + } + push @saved_ctn, ( $nextline, $nextref ); + } + + my $yamlarray; # the parsed YFM content + my $yamlres; # containing the parse error, if any + try { + $yamlarray = YAML::Tiny->read_string($yfm); + } catch { + $yamlres = $@; + } + + if ( defined($yamlres) ) { + if ($lenient) { + $yamlres =~ s/ at .*$//; # Remove the error localisation in YAML::Tiny die message, if any (for our test) + warn wrap_mod( + "po4a::text", + dgettext( + "po4a", + "Proceeding even if the YAML Front Matter could not be parsed. Remove the 'yfm_lenient' option for a stricter behavior.\nIgnored error: %s" + ), + $yamlres + ); + my $len = ( scalar @saved_ctn ) - 1; + while ( $len >= 0 ) { + $self->unshiftline( $saved_ctn[ $len - 1 ], $saved_ctn[$len] ); + + # print STDERR "Unshift ".$saved_ctn[ $len - 1] ." | ". $saved_ctn[$len] ."\n"; + $len -= 2; + } + return 0; # Not a valid YAML + } else { + die wrap_mod( + "po4a::text", + dgettext( + "po4a", + "Could not get the YAML Front Matter from the file. If you did not intend to add a YAML front matter " + . "but an horizontal ruler, please use '----' instead, or pass the 'yfm_lenient' option.\nError: %s\nContent of the YFM: %s" + ), + $yamlres, $yfm + ); + } + } + + $self->handle_yaml( + 1, $blockref, $yamlarray, + $self->{options}{yfm_keys}, + $self->{options}{yfm_skip_array}, + $self->{options}{yfm_paths} + ); + $self->pushline("---\n"); + return 1; # Valid YAML +} + +1; + +__END__ + +=head1 AUTHORS + + Nicolas François + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2005-2008 Nicolas FRANÇOIS . + + Copyright © 2008-2009, 2018 Jonas Smedegaard . + Copyright © 2020 Martin Quinson . + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut From 358ae56cb7c424939788627ab0ef4f789265e5ef Mon Sep 17 00:00:00 2001 From: gemmaro Date: Mon, 23 Feb 2026 16:10:37 +0900 Subject: [PATCH 3/3] Keep the last line of YAML front matter section. --- lib/Locale/Po4a/YamlFrontMatter.pm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/Locale/Po4a/YamlFrontMatter.pm b/lib/Locale/Po4a/YamlFrontMatter.pm index 950042217..49ff55fa4 100644 --- a/lib/Locale/Po4a/YamlFrontMatter.pm +++ b/lib/Locale/Po4a/YamlFrontMatter.pm @@ -82,8 +82,12 @@ sub parse_yaml_front_matter { my @saved_ctn; my ( $nextline, $nextref ) = $self->shiftline(); push @saved_ctn, ( $nextline, $nextref ); + my $last_line; while ( defined($nextline) ) { - last if ( $nextline =~ /^(---|\.\.\.)$/ ); + if ( $nextline =~ /^(---|\.\.\.)$/ ) { + $last_line = $nextline; + last; + } $yfm .= $nextline; ( $nextline, $nextref ) = $self->shiftline(); if ( $nextline =~ /: [\[\{]/ ) { @@ -147,7 +151,7 @@ sub parse_yaml_front_matter { $self->{options}{yfm_skip_array}, $self->{options}{yfm_paths} ); - $self->pushline("---\n"); + $self->pushline($last_line); return 1; # Valid YAML }