Skip to content

Commit 6698ad4

Browse files
authored
fix(SPEC): update SPEC and fix some inconsistencies (#5206)
The change updates the SPEC to reflect the current status of Flux. As such the SPEC is no longer a _living_ document and instead describes current behaviors. The few IMPL issues that were not complete are left as feature requests for the future. Summary of the changes: - Specific docs about universe functions have been removed - References to the standard library have been added - `empty` and `in` keywords have been removed - Missing operators added - Numeric literals removed - Regex escape sequences have been updated in the SPEC and the implementation was updated to match the SPEC - Duration literal add/sub examples have been updated - Shorthand date time literals have been updated - Add docs about stream type remove generator type
1 parent c46e047 commit 6698ad4

File tree

12 files changed

+604
-3541
lines changed

12 files changed

+604
-3541
lines changed

docs/SPEC.md

Lines changed: 81 additions & 3331 deletions
Large diffs are not rendered by default.

internal/parser/strconv.go

Lines changed: 73 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,17 @@ import (
1818
func ParseTime(lit string) (time.Time, error) {
1919
if !strings.Contains(lit, "T") {
2020
// This is a date.
21-
return time.Parse("2006-01-02", lit)
21+
t, err := time.Parse("2006-01-02", lit)
22+
if err != nil {
23+
return time.Time{}, errors.New(codes.Invalid, "cannot parse date")
24+
}
25+
return t, nil
26+
}
27+
t, err := time.Parse(time.RFC3339Nano, lit)
28+
if err != nil {
29+
return time.Time{}, errors.New(codes.Invalid, "cannot parse date time")
2230
}
23-
// todo(jsternberg): need to also parse when there is no time offset.
24-
return time.Parse(time.RFC3339Nano, lit)
31+
return t, nil
2532
}
2633

2734
// MustParseTime parses a time literal and panics in the case of an error.
@@ -138,16 +145,11 @@ func writeNextUnescapedRune(s string, builder *strings.Builder) (width int, err
138145
case '$':
139146
r = '$'
140147
case 'x':
141-
// Decode two hex chars as a single byte
142-
if len(s[width:]) < 2 {
143-
return 0, fmt.Errorf("invalid byte value %q", s[width:])
148+
b, err := fromHexDigits(s[width:])
149+
if err != nil {
150+
return 0, err
144151
}
145-
ch1, ok1 := fromHexChar(s[width])
146-
ch2, ok2 := fromHexChar(s[width+1])
147-
if !ok1 || !ok2 {
148-
return 0, fmt.Errorf("invalid byte value %q", s[width:])
149-
}
150-
builder.WriteByte((ch1 << 4) | ch2)
152+
builder.WriteByte(b)
151153
return width + 2, nil
152154
default:
153155
return 0, fmt.Errorf("invalid escape character %q", next)
@@ -160,6 +162,20 @@ func writeNextUnescapedRune(s string, builder *strings.Builder) (width int, err
160162
return
161163
}
162164

165+
// fromHexDigits decodes a single byte from two hex digits from the string or an error
166+
func fromHexDigits(s string) (byte, error) {
167+
// Decode two hex chars as a single byte
168+
if len(s) < 2 {
169+
return 0, errors.New(codes.Invalid, "expected 2 hex characters")
170+
}
171+
ch1, ok1 := fromHexChar(s[0])
172+
ch2, ok2 := fromHexChar(s[1])
173+
if !ok1 || !ok2 {
174+
return 0, fmt.Errorf("invalid byte value %q", s)
175+
}
176+
return ((ch1 << 4) | ch2), nil
177+
}
178+
163179
// fromHexChar converts a hex character into its value and a success flag.
164180
func fromHexChar(c byte) (byte, bool) {
165181
switch {
@@ -186,8 +202,50 @@ func ParseRegexp(lit string) (*regexp.Regexp, error) {
186202
}
187203

188204
expr := lit[1 : len(lit)-1]
189-
if index := strings.Index(expr, "\\/"); index != -1 {
190-
expr = strings.Replace(expr, "\\/", "/", -1)
205+
// Unescape regex literal
206+
var (
207+
builder strings.Builder
208+
width, pos int
209+
err error
210+
)
211+
builder.Grow(len(expr))
212+
for pos < len(expr) {
213+
width, err = writeNextUnescapedRegexRune(expr[pos:], &builder)
214+
if err != nil {
215+
return nil, err
216+
}
217+
pos += width
218+
}
219+
return regexp.Compile(builder.String())
220+
221+
}
222+
223+
// writeNextUnescapedRegexRune writes a rune to builder from s.
224+
// The rune is the next decoded UTF-8 rune with regex escaping rules applied.
225+
func writeNextUnescapedRegexRune(s string, builder *strings.Builder) (int, error) {
226+
r, width := utf8.DecodeRuneInString(s)
227+
if r == '\\' {
228+
next, w := utf8.DecodeRuneInString(s[width:])
229+
width += w
230+
switch next {
231+
case '/':
232+
builder.WriteRune('/')
233+
return width, nil
234+
case 'x':
235+
b, err := fromHexDigits(s[width:])
236+
if err != nil {
237+
return 0, err
238+
}
239+
builder.WriteByte(b)
240+
return width + 2, nil
241+
default:
242+
// Standard regexp escape characters may exist,
243+
// we leave them alone and let Go's regex parser validate them.
244+
builder.WriteRune('\\')
245+
builder.WriteRune(next)
246+
return width, nil
247+
}
191248
}
192-
return regexp.Compile(expr)
249+
builder.WriteRune(r)
250+
return width, nil
193251
}

internal/parser/strconv_test.go

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package parser_test
22

33
import (
44
"testing"
5+
"time"
56

67
"github.com/google/go-cmp/cmp"
78
"github.com/influxdata/flux/ast"
@@ -165,3 +166,155 @@ func TestParseDuration(t *testing.T) {
165166
})
166167
}
167168
}
169+
170+
func TestParseTime(t *testing.T) {
171+
testCases := []struct {
172+
testName string
173+
time string
174+
want time.Time
175+
err error
176+
}{
177+
{
178+
testName: "RFC3339Nano",
179+
time: "2022-09-14T04:37:17.123456789Z",
180+
want: time.Date(2022, 9, 14, 4, 37, 17, 123456789, time.UTC),
181+
},
182+
{
183+
testName: "millis",
184+
time: "2022-09-14T04:37:17.123Z",
185+
want: time.Date(2022, 9, 14, 4, 37, 17, 123000000, time.UTC),
186+
},
187+
{
188+
testName: "date time offset",
189+
time: "2022-09-14T04:37:17.123456789-07:00",
190+
want: time.Date(2022, 9, 14, 4, 37, 17, 123456789, time.FixedZone("", -7*60*60)),
191+
},
192+
{
193+
testName: "date only",
194+
time: "2022-09-14",
195+
want: time.Date(2022, 9, 14, 0, 0, 0, 0, time.UTC),
196+
},
197+
{
198+
testName: "date only error",
199+
time: "2022-00-14",
200+
err: errors.New(codes.Invalid, "cannot parse date"),
201+
},
202+
{
203+
testName: "date time no offset",
204+
time: "2022-09-14T04:37:17.123456789",
205+
err: errors.New(codes.Invalid, "cannot parse date time"),
206+
},
207+
}
208+
209+
for _, tc := range testCases {
210+
tc := tc
211+
t.Run(tc.testName, func(t *testing.T) {
212+
result, err := parser.ParseTime(tc.time)
213+
214+
if err != nil && tc.err == nil {
215+
t.Errorf("Unexpected error: %v", err)
216+
} else if tc.err != nil && tc.err == nil {
217+
t.Errorf("Expected error but got nil: %v", tc.err)
218+
} else if tc.err != nil && !cmp.Equal(err, tc.err) {
219+
t.Errorf("Expected time error: %v", cmp.Diff(err, tc.err))
220+
} else if !cmp.Equal(result, tc.want) {
221+
t.Errorf("Expected time values to be eq: %v", cmp.Diff(result, tc.want))
222+
}
223+
})
224+
}
225+
}
226+
227+
func TestParseString(t *testing.T) {
228+
testCases := []struct {
229+
testName string
230+
str string
231+
want string
232+
err error
233+
}{
234+
{
235+
testName: "normal",
236+
str: `"hello world"`,
237+
want: "hello world",
238+
},
239+
{
240+
testName: "escape sequences",
241+
str: `"newline\n
242+
carriage return\r
243+
horizontal tab\t
244+
double quote \"
245+
backslash \\
246+
dollar curly braket \${
247+
"`,
248+
249+
want: "newline\n\ncarriage return\r\nhorizontal tab\t\ndouble quote \"\nbackslash \\\ndollar curly braket ${\n",
250+
},
251+
{
252+
testName: "hex escape sequences",
253+
str: `"\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e"`,
254+
want: "日本語",
255+
},
256+
}
257+
258+
for _, tc := range testCases {
259+
tc := tc
260+
t.Run(tc.testName, func(t *testing.T) {
261+
result, err := parser.ParseString(tc.str)
262+
263+
if err != nil && tc.err == nil {
264+
t.Errorf("Unexpected error: %v", err)
265+
} else if tc.err != nil && tc.err == nil {
266+
t.Errorf("Expected error but got nil: %v", tc.err)
267+
} else if tc.err != nil && !cmp.Equal(err, tc.err) {
268+
t.Errorf("Expected string error: %v", cmp.Diff(err, tc.err))
269+
} else if !cmp.Equal(result, tc.want) {
270+
t.Errorf("Expected string values to be eq: %v", cmp.Diff(result, tc.want))
271+
}
272+
})
273+
}
274+
}
275+
276+
func TestParseRegex(t *testing.T) {
277+
testCases := []struct {
278+
testName string
279+
str string
280+
want string
281+
err error
282+
}{
283+
{
284+
testName: "normal",
285+
str: `/hello world/`,
286+
want: "hello world",
287+
},
288+
{
289+
testName: "escape sequences",
290+
str: `/forward slash \/ character classes: \w\s\d/`,
291+
want: `forward slash / character classes: \w\s\d`,
292+
},
293+
{
294+
testName: "hex escape sequences",
295+
str: `/\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e/`,
296+
want: "日本語",
297+
},
298+
}
299+
300+
for _, tc := range testCases {
301+
tc := tc
302+
t.Run(tc.testName, func(t *testing.T) {
303+
regex, err := parser.ParseRegexp(tc.str)
304+
var result string
305+
if regex != nil {
306+
result = regex.String()
307+
}
308+
309+
if err != nil && tc.err == nil {
310+
t.Errorf("Unexpected error: %v", err)
311+
} else if tc.err != nil && tc.err == nil {
312+
t.Errorf("Expected error but got nil: %v", tc.err)
313+
} else if tc.err != nil && !cmp.Equal(err, tc.err) {
314+
t.Errorf("Expected regexp error: %v", cmp.Diff(err, tc.err))
315+
} else if !cmp.Equal(result, tc.want) {
316+
t.Errorf("Expected regexp values to be eq: %v", cmp.Diff(result, tc.want))
317+
}
318+
})
319+
}
320+
}

libflux/flux-core/src/parser/mod.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,6 +1457,10 @@ impl<'input> Parser<'input> {
14571457
}
14581458

14591459
fn create_bad_expression(&mut self, t: Token) -> Expression {
1460+
let text = format!("invalid token for primary expression: {}", t.tok);
1461+
self.create_bad_expression_with_text(t, text)
1462+
}
1463+
fn create_bad_expression_with_text(&mut self, t: Token, text: String) -> Expression {
14601464
Expression::Bad(Box::new(BadExpr {
14611465
// Do not use `self.base_node_*` in order not to steal errors.
14621466
// The BadExpr is an error per se. We want to leave errors to parents.
@@ -1467,7 +1471,7 @@ impl<'input> Parser<'input> {
14671471
),
14681472
..BaseNode::default()
14691473
},
1470-
text: format!("invalid token for primary expression: {}", t.tok),
1474+
text,
14711475
expression: None,
14721476
}))
14731477
}
@@ -1513,7 +1517,13 @@ impl<'input> Parser<'input> {
15131517
let lit = self.parse_time_literal();
15141518
match lit {
15151519
Ok(lit) => Expression::DateTime(lit),
1516-
Err(terr) => self.create_bad_expression(terr.token),
1520+
Err(terr) => match terr.token.tok {
1521+
TokenType::Time => self.create_bad_expression_with_text(
1522+
terr.token,
1523+
"invalid date time literal, missing time offset".to_string(),
1524+
),
1525+
_ => self.create_bad_expression(terr.token),
1526+
},
15171527
}
15181528
}
15191529
TokenType::Duration => {

0 commit comments

Comments
 (0)