Skip to content

Commit 5181196

Browse files
authored
Always include whitespace in string literals (#688)
* Always include whitespace in string literals Signed-off-by: Shane Loretz <[email protected]> * Add tests for #676 Signed-off-by: Shane Loretz <[email protected]>
1 parent e28e487 commit 5181196

File tree

3 files changed

+54
-5
lines changed

3 files changed

+54
-5
lines changed

rosidl_parser/rosidl_parser/grammar.lark

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@
1717
%import common.WS
1818
%ignore WS
1919

20+
// Copied from lark-parser instead of imported so wide string doesn't match: `L "white space between L and quote"`
21+
// https://github.com/lark-parser/lark/blob/953171821ed307f700fddf27f3fcb9483346bd46/lark/grammars/common.lark#L26-L29
22+
_STRING_INNER: /.*?/
23+
_STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/
24+
25+
ESCAPED_STRING : "\"" _STRING_ESC_INNER "\""
26+
ESCAPED_WIDE_STRING: "L\"" _STRING_ESC_INNER "\""
27+
2028

2129
// 7.2.2 Comments
2230
COMMENT: "//" /[^\n]/*
@@ -71,11 +79,8 @@ _ESCAPE_SEQUENCES: "\\n" | "\\t" | "\\v" | "\\b" | "\\r" | "\\f" | "\\a" | "\\\\
7179
// adjacent string literals are concatenated
7280
string_literals: string_literal+
7381
wide_string_literals: wide_string_literal+
74-
// string_literal: "\"" CHAR* "\""
75-
// wide_string_literal: "L\"" CHAR* "\""
76-
// replace precise rules based on the spec with regex for parsing performance
77-
string_literal: "\"\"" | "\"" /(\\\"|[^"])+/ "\""
78-
wide_string_literal: "L\"\"" | "L\"" /(\\\"|[^"])+/ "\""
82+
string_literal: ESCAPED_STRING
83+
wide_string_literal: ESCAPED_WIDE_STRING
7984

8085
// 7.2.6.4 Floating-point Literals
8186
floating_pt_literal: FLOAT

rosidl_parser/rosidl_parser/parser.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,16 @@ def get_string_literal_value(string_literal, *, allow_unicode=False):
628628
assert isinstance(child, Token)
629629
value = child.value
630630

631+
assert child.type in ('ESCAPED_STRING', 'ESCAPED_WIDE_STRING')
632+
if 'ESCAPED_WIDE_STRING' == child.type:
633+
assert len(value) >= 3
634+
# Get rid of leading L" and trailing "
635+
value = value[2:-1]
636+
else:
637+
assert len(value) >= 2
638+
# Get rid of leading " and trailing "
639+
value = value[1:-1]
640+
631641
regex = _get_escape_sequences_regex(allow_unicode=allow_unicode)
632642
value = regex.sub(_decode_escape_sequence, value)
633643
# unescape double quote and backslash if preceeded by a backslash

rosidl_parser/test/test_parser.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
from rosidl_parser.definition import UnboundedSequence
3131
from rosidl_parser.definition import UnboundedString
3232
from rosidl_parser.definition import UnboundedWString
33+
from rosidl_parser.parser import get_ast_from_idl_string
34+
from rosidl_parser.parser import get_string_literals_value
3335
from rosidl_parser.parser import parse_idl_file
3436

3537
MESSAGE_IDL_LOCATOR = IdlLocator(
@@ -45,6 +47,38 @@ def message_idl_file():
4547
return parse_idl_file(MESSAGE_IDL_LOCATOR)
4648

4749

50+
def test_whitespace_at_start_of_string():
51+
# Repeat to check ros2/rosidl#676
52+
for _ in range(10):
53+
ast = get_ast_from_idl_string('const string foo = " e";')
54+
token = next(ast.find_pred(lambda t: 'string_literals' == t.data))
55+
assert ' e' == get_string_literals_value(token)
56+
57+
58+
def test_whitespace_at_start_of_wide_string():
59+
# Repeat to check ros2/rosidl#676
60+
for _ in range(10):
61+
ast = get_ast_from_idl_string('const wstring foo = L" e";')
62+
token = next(ast.find_pred(lambda t: 'wide_string_literals' == t.data))
63+
assert ' e' == get_string_literals_value(token, allow_unicode=True)
64+
65+
66+
def test_whitespace_at_end_of_string():
67+
# Repeat to check ros2/rosidl#676
68+
for _ in range(10):
69+
ast = get_ast_from_idl_string('const string foo = "e ";')
70+
token = next(ast.find_pred(lambda t: 'string_literals' == t.data))
71+
assert 'e ' == get_string_literals_value(token)
72+
73+
74+
def test_whitespace_at_end_of_wide_string():
75+
# Repeat to check ros2/rosidl#676
76+
for _ in range(10):
77+
ast = get_ast_from_idl_string('const wstring foo = L"e ";')
78+
token = next(ast.find_pred(lambda t: 'wide_string_literals' == t.data))
79+
assert 'e ' == get_string_literals_value(token, allow_unicode=True)
80+
81+
4882
def test_message_parser(message_idl_file):
4983
messages = message_idl_file.content.get_elements_of_type(Message)
5084
assert len(messages) == 1

0 commit comments

Comments
 (0)