@@ -10,32 +10,46 @@ with Liblktlang.Public_Converters; use Liblktlang.Public_Converters;
1010
1111package body Liblktlang.Implementation.Extensions is
1212
13- function Common_Denoted_String (Node : Bare_Lkt_Node) return String_Type;
13+ function Common_Denoted_String
14+ (Node : Bare_Lkt_Node) return Internal_Decoded_String_Value;
1415 -- Common implementation for the ``p_denoted_string`` property of all
1516 -- string/pattern literal nodes.
1617
1718 procedure Read_Denoted_Char
18- (Buffer : Text_Type;
19- Cursor : in out Positive;
20- Result : out Character_Type);
21- -- Read the next denoted character starting at ``Buffer (Cursor)``. Upon
22- -- return, ``Cursor`` points to the first item in ``Buffer`` for the next
23- -- character to read (or to the closing double quote if the character read
24- -- was the last one), and ``Result`` is set to the character that was just
25- -- read.
19+ (Buffer : Text_Type;
20+ For_Char_Lit : Boolean;
21+ Cursor : in out Positive;
22+ Cursor_Sloc : in out Source_Location;
23+ Result : out Internal_Decoded_Char_Value);
24+ -- Read the next denoted character starting at ``Buffer (Cursor)``.
25+ --
26+ -- The location of the character at ``Buffer (Cursor)`` must be passed to
27+ -- ``Cursor_Sloc``, which is updated to follow the evolution of ``Cursor``.
28+ --
29+ -- Upon return, ``Cursor`` points to the first item in ``Buffer`` for the
30+ -- next character to read (or to the closing single/double quote if the
31+ -- character read was the last one) and ``Result`` is set to the character
32+ -- that was just read, or to an error message if reading one character was
33+ -- unsuccessful.
2634
2735 -- -------------------------
2836 -- Common_Denoted_String --
2937 -- -------------------------
3038
31- function Common_Denoted_String (Node : Bare_Lkt_Node) return String_Type is
39+ function Common_Denoted_String
40+ (Node : Bare_Lkt_Node) return Internal_Decoded_String_Value
41+ is
42+ Tab_Stop : constant Positive := Node.Unit.Context.Tab_Stop;
43+
3244 N_Text : constant Text_Type := Text (Node);
3345 pragma Assert (N_Text (N_Text'Last) = ' "' );
3446
35- Cursor : Natural := N_Text'First + 1 ;
47+ Cursor : Natural := N_Text'First + 1 ;
48+ Cursor_Sloc : Source_Location := Start_Sloc (Sloc_Range (Node));
3649
3750 Result : Text_Type (1 .. N_Text'Length);
3851 Result_Last : Natural := Result'First - 1 ;
52+ Char_Value : Internal_Decoded_Char_Value;
3953 begin
4054 -- Make sure that the slice starts at the first denoted character in the
4155 -- presence of string literal prefix.
@@ -45,29 +59,53 @@ package body Liblktlang.Implementation.Extensions is
4559 Cursor := Cursor + 1 ;
4660 end if ;
4761
62+ -- Update Cursor_Sloc so that it reflects the location of N_Text
63+ -- (Cursor).
64+
65+ Cursor_Sloc.Column :=
66+ Cursor_Sloc.Column
67+ + Column_Count (N_Text (N_Text'First .. Cursor), Tab_Stop);
68+
4869 while Cursor /= N_Text'Last loop
4970 Result_Last := Result_Last + 1 ;
50- Read_Denoted_Char (N_Text, Cursor, Result (Result_Last));
71+ Read_Denoted_Char (N_Text, False, Cursor, Cursor_Sloc, Char_Value);
72+ if Char_Value.Has_Error then
73+ return
74+ (Value => Empty_String,
75+ Has_Error => True,
76+ Error_Sloc => Char_Value.Error_Sloc,
77+ Error_Message => Char_Value.Error_Message);
78+ end if ;
79+ Result (Result_Last) := Char_Value.Value;
5180 end loop ;
5281
53- return Create_String (Result (Result'First .. Result_Last));
82+ return
83+ (Value => Create_String (Result (Result'First .. Result_Last)),
84+ Has_Error => False,
85+ Error_Sloc => No_Source_Location,
86+ Error_Message => Empty_String);
5487 end Common_Denoted_String ;
5588
5689 -- ---------------------
5790 -- Read_Denoted_Char --
5891 -- ---------------------
5992
6093 procedure Read_Denoted_Char
61- (Buffer : Text_Type;
62- Cursor : in out Positive;
63- Result : out Character_Type)
64- is
65- -- Note that, since buffer comes from a successfully lexed character,
66- -- string or pattern literal token, it is supposed to be well-formed:
67- -- "when other" clauses in the code below are thus dead code.
94+ (Buffer : Text_Type;
95+ For_Char_Lit : Boolean;
96+ Cursor : in out Positive;
97+ Cursor_Sloc : in out Source_Location;
98+ Result : out Internal_Decoded_Char_Value) is
6899 begin
100+ Result :=
101+ (Value => ' ' ,
102+ Has_Error => False,
103+ Error_Sloc => Cursor_Sloc,
104+ Error_Message => Empty_String);
105+
69106 if Buffer (Cursor) = ' \' then
70107 Cursor := Cursor + 1 ;
108+ Cursor_Sloc.Column := Cursor_Sloc.Column + 1 ;
71109 declare
72110 function Read_Digits (N : Positive) return Character_Type;
73111 -- Read N hexadecimal digits (encoding a codepoint number) and
@@ -90,7 +128,10 @@ package body Liblktlang.Implementation.Extensions is
90128 Digit_Value : Unsigned_32;
91129 begin
92130 for I in 1 .. N loop
93- Digit_Char := Buffer (Cursor + I);
131+ Cursor := Cursor + 1 ;
132+ Cursor_Sloc.Column := Cursor_Sloc.Column + 1 ;
133+
134+ Digit_Char := Buffer (Cursor);
94135 case Digit_Char is
95136 when ' 0' .. ' 9' =>
96137 Digit_Value :=
@@ -105,14 +146,19 @@ package body Liblktlang.Implementation.Extensions is
105146 Character_Type'Pos (Digit_Char)
106147 - Character_Type'Pos (' A' ) + 10 ;
107148 when others =>
108- raise Program_Error;
149+ Result.Has_Error := True;
150+ Result.Error_Message :=
151+ Create_String (" invalid escape sequence" );
152+ return ' ' ;
109153 end case ;
110154 Codepoint := 16 * Codepoint + Digit_Value;
111155 end loop ;
112156
113- -- Move past the escape sequence prefix and the digits
157+ -- Move past the last digit of the escape sequence
158+
159+ Cursor := Cursor + 1 ;
160+ Cursor_Sloc.Column := Cursor_Sloc.Column + 1 ;
114161
115- Cursor := Cursor + 1 + N;
116162 return Character_Type'Val (Codepoint);
117163 end Read_Digits ;
118164
@@ -124,36 +170,73 @@ package body Liblktlang.Implementation.Extensions is
124170 (Codepoint : Character) return Character_Type is
125171 begin
126172 Cursor := Cursor + 1 ;
173+ Cursor_Sloc.Column := Cursor_Sloc.Column + 1 ;
127174 return Character_Type'Val (Character'Pos (Codepoint));
128175 end Short_Escape_Sequence ;
129176
130177 begin
131- Result :=
132- (case Buffer (Cursor) is
178+ case Buffer (Cursor) is
133179
134180 -- Escape sequences for codepoint numbers
135181
136- when ' x' => Read_Digits (2 ),
137- when ' u' => Read_Digits (4 ),
138- when ' U' => Read_Digits (8 ),
182+ when ' x' =>
183+ Result.Value := Read_Digits (2 );
184+ when ' u' =>
185+ Result.Value := Read_Digits (4 );
186+ when ' U' =>
187+ Result.Value := Read_Digits (8 );
139188
140189 -- Short escape sequences
141190
142- when ' 0' => Short_Escape_Sequence (ASCII.NUL),
143- when ' a' => Short_Escape_Sequence (ASCII.BEL),
144- when ' b' => Short_Escape_Sequence (ASCII.BS),
145- when ' t' => Short_Escape_Sequence (ASCII.HT),
146- when ' n' => Short_Escape_Sequence (ASCII.LF),
147- when ' v' => Short_Escape_Sequence (ASCII.VT),
148- when ' f' => Short_Escape_Sequence (ASCII.FF),
149- when ' r' => Short_Escape_Sequence (ASCII.CR),
150- when ' \' => Short_Escape_Sequence (' \' ),
151- when ' "' => Short_Escape_Sequence (' "' ),
152-
153- when others => raise Program_Error);
191+ when ' 0' =>
192+ Result.Value := Short_Escape_Sequence (ASCII.NUL);
193+ when ' a' =>
194+ Result.Value := Short_Escape_Sequence (ASCII.BEL);
195+ when ' b' =>
196+ Result.Value := Short_Escape_Sequence (ASCII.BS);
197+ when ' t' =>
198+ Result.Value := Short_Escape_Sequence (ASCII.HT);
199+ when ' n' =>
200+ Result.Value := Short_Escape_Sequence (ASCII.LF);
201+ when ' v' =>
202+ Result.Value := Short_Escape_Sequence (ASCII.VT);
203+ when ' f' =>
204+ Result.Value := Short_Escape_Sequence (ASCII.FF);
205+ when ' r' =>
206+ Result.Value := Short_Escape_Sequence (ASCII.CR);
207+ when ' \' =>
208+ Result.Value := Short_Escape_Sequence (' \' );
209+ when ' "' =>
210+ if For_Char_Lit then
211+ Result.Has_Error := True;
212+ Result.Error_Message :=
213+ Create_String (" invalid escape sequence" );
214+ else
215+ Result.Value := Short_Escape_Sequence (' "' );
216+ end if ;
217+ when ' '' =>
218+ if For_Char_Lit then
219+ Result.Value := Short_Escape_Sequence (' '' );
220+ else
221+ Result.Has_Error := True;
222+ Result.Error_Message :=
223+ Create_String (" invalid escape sequence" );
224+ end if ;
225+
226+ when others =>
227+ Result.Has_Error := True;
228+ Result.Error_Message :=
229+ Create_String (" invalid escape sequence" );
230+ end case ;
231+
232+ if Result.Has_Error then
233+ return ;
234+ end if ;
154235 end ;
155236 else
156- Result := Buffer (Cursor);
237+ Result.Value := Buffer (Cursor);
238+ Cursor_Sloc.Column :=
239+ Cursor_Sloc.Column + Column_Count (Buffer (Cursor .. Cursor));
157240 Cursor := Cursor + 1 ;
158241 end if ;
159242 end Read_Denoted_Char ;
@@ -331,17 +414,32 @@ package body Liblktlang.Implementation.Extensions is
331414 -- ----------------------------
332415
333416 function Char_Lit_P_Denoted_Value
334- (Node : Bare_Char_Lit) return Character_Type
417+ (Node : Bare_Char_Lit) return Internal_Decoded_Char_Value
335418 is
336419 N_Text : constant Text_Type := Text (Node);
337420 pragma Assert (N_Text (N_Text'First) = ' '' );
338421 pragma Assert (N_Text (N_Text'Last) = ' '' );
339422
340- Cursor : Positive := N_Text'First + 1 ;
341- Result : Character_Type;
423+ Cursor : Positive := N_Text'First + 1 ;
424+ Cursor_Sloc : Source_Location := Start_Sloc (Sloc_Range (Node));
425+ Result : Internal_Decoded_Char_Value;
342426 begin
343- Read_Denoted_Char (N_Text, Cursor, Result);
344- pragma Assert (Cursor = N_Text'Last);
427+ -- Before reading the denoted character, update Cursor_Sloc so that it
428+ -- corresponds to the character right after the opening single quote.
429+
430+ Cursor_Sloc.Column := Cursor_Sloc.Column + 1 ;
431+ Read_Denoted_Char (N_Text, True, Cursor, Cursor_Sloc, Result);
432+
433+ -- Ensure that reading one character has moved the cursor to the closing
434+ -- quote. If it is not the case, there are too many characters in this
435+ -- literal.
436+
437+ if not Result.Has_Error and then Cursor /= N_Text'Last then
438+ Result.Has_Error := True;
439+ Result.Error_Sloc := Cursor_Sloc;
440+ Result.Error_Message :=
441+ Create_String (" exactly one character expected" );
442+ end if ;
345443 return Result;
346444 end Char_Lit_P_Denoted_Value ;
347445
@@ -350,7 +448,7 @@ package body Liblktlang.Implementation.Extensions is
350448 -- ------------------------------
351449
352450 function String_Lit_P_Denoted_Value
353- (Node : Bare_String_Lit) return String_Type is
451+ (Node : Bare_String_Lit) return Internal_Decoded_String_Value is
354452 begin
355453 return Common_Denoted_String (Node);
356454 end String_Lit_P_Denoted_Value ;
@@ -360,7 +458,7 @@ package body Liblktlang.Implementation.Extensions is
360458 -- -----------------------------
361459
362460 function Token_Lit_P_Denoted_Value
363- (Node : Bare_Token_Lit) return String_Type is
461+ (Node : Bare_Token_Lit) return Internal_Decoded_String_Value is
364462 begin
365463 return Common_Denoted_String (Node);
366464 end Token_Lit_P_Denoted_Value ;
@@ -370,7 +468,7 @@ package body Liblktlang.Implementation.Extensions is
370468 -- -------------------------------------
371469
372470 function Token_Pattern_Lit_P_Denoted_Value
373- (Node : Bare_Token_Pattern_Lit) return String_Type is
471+ (Node : Bare_Token_Pattern_Lit) return Internal_Decoded_String_Value is
374472 begin
375473 return Common_Denoted_String (Node);
376474 end Token_Pattern_Lit_P_Denoted_Value ;
0 commit comments