Skip to content

Commit 53f0f9b

Browse files
authored
Merge pull request #6 from alerque/format-ast
Massage PEG grammar's AST to match the Fluent reference implementation
2 parents 0c0ac18 + 9e019dd commit 53f0f9b

File tree

169 files changed

+11042
-6899
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

169 files changed

+11042
-6899
lines changed

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,20 @@ As of yet this does nothing *usable* (see [lua alternatives](#alternatives)). I'
1919

2020
**Update 2019-09-14**: I've completed a PEG grammar based parser for the entire 1.0 Fluent file format spec. All the pieces are there, but it's only partially tested. It at least parses a few basic types of entries. The AST it returns is straight out of *luaebnf* and probably needs massaging to match the reference ones (via capture groups?), then it needs testing against the upstream fixtures.
2121

22+
**Udate 2019-09-24**: The AST returned by the PEG grammar has been massaged to be usable for some basic cases. A basic Lua API is starting to take shape, modeled most closely to the Python implementation. It is possible to load almost any FTL file, and possible to format any messages that are plain strings (no parameters, attributes, functions, or other jazz yet). Note the usage is *off* because there is no locale handling yet no it's only usable with separate instances per locale. Also `add_messages()` likely only works once, so cram your whole FTL resource in there for now.
23+
24+
## Usage
25+
26+
```lua
27+
local FluentBundle = require("fluent")
28+
29+
local en = FluentBundle("en-US")
30+
31+
en:add_messages("foo = bar")
32+
33+
en:format("foo")
34+
```
35+
2236
## Alternative(s)
2337

2438
If you need something that works in Lua *now*, have a look at the already mature `i18n.lua` project ([Github](https://github.com/kikito/i18n.lua) / [LuaRocks](https://luarocks.org/modules/kikito/i18n)). It implements many of the same features this project will, just without the interoperability with other Fluent based tools. The Lua API it provides is quite nice, but your localization data needs to be provided in Lua tables instead of FTL files. While Fluent has quite a few more tricks up its sleeve the *i18n* module already has working interpolation, pluralization, locale fallbacks, and more. And it works now, today.

fluent/init.lua

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,31 @@
11
-- External dependencies
22
local class = require("pl.class")
3+
local tablex = require("pl.tablex")
34

45
-- Internal modules
56
local FluentSyntax = require("fluent.syntax")
67

7-
8-
local FluentMessages = class({
9-
})
10-
118
local FluentBundle = class({
129
locale = nil,
10+
locales = {},
11+
syntax = FluentSyntax(),
1312

1413
_init = function (self, locale)
1514
self.locale = locale
16-
self.messages = FluentMessages()
17-
self.syntax = FluentSyntax()
1815
end,
1916

2017
add_messages = function (self, input)
21-
for k, v in input:gmatch("(%w+) = (%w+)") do
22-
self.messages[k] = v
23-
end
18+
if type(input) == "string" then input = { input } end
19+
-- TODO: add way to add two resources together, then reduce instead of unpacking this
20+
local resource = tablex.imap(function (v) return self.syntax:parsestring(v) end, input)
21+
self.locales[self.locale] = resource[1]
2422
end,
2523

26-
format = function (self, key)
27-
return self.messages[key]
24+
format = function (self, identifier, parameters)
25+
local resource = self.locales[self.locale]
26+
local message = resource:get_message(identifier)
27+
-- local message = resource[identifier]
28+
return message:format(parameters)
2829
end
2930
})
3031

fluent/messages.lua

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- External dependencies
2+
local class = require("pl.class")
3+
4+
local FluentMessages = class({
5+
})
6+
7+
return FluentMessages
8+

fluent/parser.lua

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
-- External dependencies
2+
local class = require("pl.class")
3+
local epnf = require("epnf")
4+
5+
local nulleof = "NULL\000"
6+
local eol = function () return "\n" end
7+
8+
-- UTF8 code points up to four-byte encodings
9+
local function f1 (s)
10+
return string.byte(s)
11+
end
12+
local function f2 (s)
13+
local c1, c2 = string.byte(s, 1, 2)
14+
return c1 * 64 + c2 - 12416
15+
end
16+
local function f3 (s)
17+
local c1, c2, c3 = string.byte(s, 1, 3)
18+
return (c1 * 64 + c2) * 64 + c3 - 925824
19+
end
20+
local function f4 (s)
21+
local c1, c2, c3, c4 = string.byte(s, 1, 4)
22+
return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168
23+
end
24+
local cont = "\128\191"
25+
26+
-- luacheck: push ignore
27+
local ftl_eof = epnf.define(function (_ENV)
28+
eol_eof = 1^0 * P(nulleof) * -1
29+
START("eol_eof")
30+
end)
31+
32+
local ftl_grammar = epnf.define(function (_ENV)
33+
local blank_inline = P" "^1
34+
local line_end = P"\r\n" / eol + P"\n" + P(nulleof)
35+
blank_block = C((blank_inline^-1 * line_end)^1); local blank_block = V"blank_block"
36+
local blank = (blank_inline + line_end)^1
37+
local digits = R"09"^1
38+
local special_text_char = P"{" + P"}"
39+
local any_char = R("\0\127") / f1 + R("\194\223") * R(cont) / f2 + R("\224\239") * R(cont) * R(cont) / f3 + R("\240\244") * R(cont) * R(cont) * R(cont) / f4
40+
local text_char = any_char - special_text_char - line_end
41+
local special_quoted_char = P'"' + P"\\"
42+
local special_escape = P"\\" * special_quoted_char
43+
local unicode_escape = (P"\\u" * P(4) * R("09", "af", "AF")^4) + (P"\\u" * P(6) * R("09", "af", "AF")^6)
44+
local quoted_char = (any_char - special_quoted_char - line_end) + special_escape + unicode_escape
45+
local indented_char = text_char - P"{" - P"*" - P"."
46+
Identifier = Cg(R("az", "AZ") * (R("az", "AZ", "09") + P"_" + P"-")^0, "name")
47+
local variant_list = V"Variant"^0 * V"DefaultVariant" * V"Variant" * line_end
48+
Variant = line_end * blank^-1 * V"VariantKey" * blank_inline^-1 * V"Pattern"
49+
DefaultVariant = line_end * blank^-1 * P"*" * V"VariantKey" * blank_inline^-1 * V"Pattern"
50+
VariantKey = P"[" * blank^-1 * (V"NumberLiteral" + V"Identifier") * blank^-1 * P"]"
51+
NumberLiteral = P"-"^-1 * digits * (P"." * digits)^-1
52+
local inline_placeable = P"{" * blank^-1 * (V"SelectExpression" + V"InlineExpression") * blank^-1 * P"}"
53+
local block_placeable = blank_block * blank_inline^-1 * inline_placeable
54+
local inline_text = text_char^1
55+
local block_text = blank_block * blank_inline * indented_char * inline_text^-1
56+
StringLiteral = P'"' * quoted_char^0 * P'"'
57+
FunctionReference = V"Identifier" * V"CallArguments"
58+
MessageReference = V"Identifier" * V"AttributeAccessor"^-1
59+
TermReference = P"-" * V"Identifier" * V"AttributeAccessor"^-1 * V"CallArguments"^-1
60+
VariableReference = P"$" * V"Identifier"
61+
AttributeAccessor = P"." * V"Identifier"
62+
NamedArgument = V"Identifier" * blank^-1 * P":" * blank^-1 * (V"StringLiteral" + V"NumberLiteral")
63+
Argument = V"NamedArgument" + V"InlineExpression"
64+
local argument_list = (V"Argument" * blank^-1 * P"," * blank^-1)^0 * V"Argument"^-1
65+
CallArguments = blank^-1 * P"(" * blank^-1 * argument_list * blank^-1 * P")"
66+
SelectExpression = V"InlineExpression" * blank^-1 * P"->" * blank_inline^-1 * variant_list
67+
InlineExpression = V"StringLiteral" + V"NumberLiteral" + V"FunctionReference" + V"MessageReference" + V"TermReference" + V"VariableReference" + inline_placeable
68+
PatternElement = Cg(C(inline_text + block_text + inline_placeable + block_placeable), "value")
69+
Pattern = V"PatternElement"^1
70+
Attribute = line_end * blank^-1 * P"." * V"Identifier" * blank_inline^-1 * "=" * blank_inline^-1 * V"Pattern"
71+
local junk_line = (1-line_end)^0 * (P"\n" + P(nulleof))
72+
Junk = Cg(junk_line * (junk_line - P"#" - P"-" - R("az","AZ"))^0, "content")
73+
local comment_char = any_char - line_end
74+
CommentLine = Cg(P"###" + P"##" + P"#", "sigil") * (" " * Cg(C(comment_char^0), "content"))^-1 * line_end
75+
Term = P"-" * V"Identifier" * blank_inline^-1 * "=" * blank_inline^-1 * V"Pattern" * V"Attribute"^0
76+
Message = V"Identifier" * blank_inline^-1 * P"=" * blank_inline^-1 * ((V"Pattern" * V"Attribute"^0) + V"Attribute"^1)
77+
Entry = (V"Message" * line_end) + (V"Term" * line_end) + V"CommentLine"
78+
Resource = (V"Entry" + blank_block + V"Junk")^0 * (P(nulleof) + EOF"unparsable input")
79+
START("Resource")
80+
end)
81+
-- luacheck: pop
82+
83+
-- TODO: if this doesn't need any state information make in a function not a class
84+
local FluentParser = class({
85+
_init = function (self, input)
86+
return type(input) == "string" and self:parsestring(input) or error("unknown input type")
87+
end,
88+
89+
addtrailingnewine = function(input)
90+
local hasnulleof = epnf.parsestring(ftl_eof, input)
91+
return type(hasnulleof) == "nil" and input..nulleof or input
92+
end,
93+
94+
parsestring = function (self, input)
95+
input = self.addtrailingnewine(input)
96+
return epnf.parsestring(ftl_grammar, input)
97+
end
98+
})
99+
100+
return FluentParser

0 commit comments

Comments
 (0)