1515-- of the license. --
1616-- ----------------------------------------------------------------------------
1717
18- with Ada.Exceptions ; use Ada.Exceptions;
19- with Ada.Characters.Handling ;
2018with GNAT.Strings ; use GNAT.Strings;
21- with GNATCOLL.Traces ; use GNATCOLL.Traces;
2219
20+ with GNATCOLL.Traces ; use GNATCOLL.Traces;
2321with GNATCOLL.VFS ; use GNATCOLL.VFS;
24- with GNATCOLL.Iconv ; use GNATCOLL.Iconv;
22+
2523with VSS.Strings ; use VSS.Strings;
24+ pragma Warnings
25+ (Off, " unit "" VSS.Strings.Character_Iterators"" is not referenced" );
26+ -- GNAT 20220919 report this package as unused, however it is necessary to
27+ -- make visible full declaration of Character_Iterator.
28+ with VSS.Strings.Character_Iterators ;
29+ with VSS.Strings.Converters.Decoders ;
2630with VSS.Strings.Conversions ;
31+
2732with LSP.Ada_Documents ; use LSP.Ada_Documents;
2833with Libadalang.Preprocessing ; use Libadalang.Preprocessing;
2934with Langkit_Support.File_Readers ; use Langkit_Support.File_Readers;
35+ with Langkit_Support.Slocs ;
36+ with Langkit_Support.Text ;
3037
3138package body LSP.Ada_Handlers.File_Readers is
3239
33- Me : constant Trace_Handle := Create (" ALS.FILE_READERS" );
34-
35- function Read_And_Convert_To_UTF8
36- (Filename : String; Charset : String)
37- return GNAT.Strings.String_Access;
38- -- Read the file content from Filename and convert it from the original
39- -- Charset to UTF-8.
40+ use all type VSS.Strings.Converters.Converter_Flag;
4041
41- -- ----------------------------
42- -- Read_And_Convert_To_UTF8 --
43- -- ----------------------------
42+ Me : constant Trace_Handle := Create (" ALS.FILE_READERS" );
4443
45- function Read_And_Convert_To_UTF8
46- (Filename : String; Charset : String)
47- return GNAT.Strings.String_Access
44+ procedure Read_And_Decode
45+ (Filename : String;
46+ Charset : VSS.Strings.Virtual_String;
47+ Decoded : out VSS.Strings.Virtual_String;
48+ Error : out VSS.Strings.Virtual_String);
49+ -- Read the file content from Filename and decode it from the original
50+ -- Charset.
51+
52+ Decoder_Flags : constant VSS.Strings.Converters.Converter_Flags :=
53+ (Stateless => True,
54+ -- Data is decoded as single chunk, don't save state but report error
55+ -- for incomplete byte sequences at the end of data
56+ Stop_On_Error => False,
57+ -- Errors should be reported but not to stop decoding of the following
58+ -- data
59+ Process_BOM => True);
60+ -- Byte-Order-Mark at the beginning of the data should be ignored if
61+ -- present
62+ -- Default flags for the text decoder.
63+
64+ -- -------------------
65+ -- Read_And_Decode --
66+ -- -------------------
67+
68+ procedure Read_And_Decode
69+ (Filename : String;
70+ Charset : VSS.Strings.Virtual_String;
71+ Decoded : out VSS.Strings.Virtual_String;
72+ Error : out VSS.Strings.Virtual_String)
4873 is
49- Raw : GNAT.Strings.String_Access;
50- Decoded : GNAT.Strings.String_Access;
74+ Raw : GNAT.Strings.String_Access;
75+ Decoder : VSS.Strings.Converters.Decoders.Virtual_String_Decoder;
76+
5177 begin
5278 -- Read the file (this call uses MMAP)
79+
5380 Raw := Create_From_UTF8 (Filename).Read_File;
5481
5582 if Raw = null then
56- return null ;
83+ Decoded.Clear;
84+ Error := " Unable to read file" ;
85+
86+ return ;
5787 end if ;
5888
59- -- Convert the file if it's not already encoded in utf-8
89+ Decoder.Initialize (Charset, Decoder_Flags);
6090
61- if Ada.Characters.Handling.To_Lower (Charset) = " utf-8" then
62- Decoded := Raw;
63- else
64- declare
65- State : constant Iconv_T := Iconv_Open (UTF8, Charset);
66- Outbuf : Byte_Sequence (1 .. 4096 );
67- Input_Index : Positive := Raw'First;
68- Conv_Result : Iconv_Result := Full_Buffer;
69- Output_Index : Positive;
70- begin
71- while Conv_Result = Full_Buffer loop
72- Output_Index := 1 ;
73- Iconv (State => State,
74- Inbuf => Raw.all ,
75- Input_Index => Input_Index,
76- Outbuf => Outbuf,
77- Output_Index => Output_Index,
78- Result => Conv_Result);
79-
80- -- Append the converted contents
81- if Decoded /= null then
82- declare
83- Tmp : GNAT.Strings.String_Access := Decoded;
84- begin
85- Decoded := new String'
86- (Tmp.all & Outbuf (1 .. Output_Index - 1 ));
87- GNAT.Strings.Free (Tmp);
88- end ;
89- else
90- Decoded := new String'(Outbuf (1 .. Output_Index - 1 ));
91- end if ;
92- end loop ;
93-
94- GNAT.Strings.Free (Raw);
95- Iconv_Close (State);
96-
97- case Conv_Result is
98- when Success =>
99- -- The conversion was successful
100- null ;
101- when others =>
102- Me.Trace
103- (" Failed to convert '" & Filename & " ' to UTF-8: "
104- & Conv_Result'Img);
105- return null ;
106- end case ;
107- exception
108- when E : others =>
109-
110- Me.Trace
111- (" Exception caught when reading '" & Filename & " ':"
112- & Exception_Message (E));
113- return null ;
114- end ;
91+ if not Decoder.Is_Valid then
92+ -- Charset is not supported, fallback to "utf-8".
93+
94+ Me.Trace
95+ (" Encoding '"
96+ & VSS.Strings.Conversions.To_UTF_8_String (Charset)
97+ & " ' is not supported by text decoder." );
98+
99+ Decoder.Initialize (" utf-8" , Decoder_Flags);
115100 end if ;
116101
117- -- Convert the string to a Virtual_String for easier handling
102+ pragma Assert (Decoder.Is_Valid);
103+ -- At this point decoder is initialized to decode ether given encoding
104+ -- or fallback encoding "utf-8", which is known to be supported.
118105
119- return Decoded;
120- exception
121- when E : others =>
122- if Decoded /= null then
123- GNAT.Strings.Free (Decoded);
124- end if ;
106+ declare
107+ Encoded : constant Ada.Streams.Stream_Element_Array (1 .. Raw'Length)
108+ with Import, Address => Raw.all 'Address;
125109
126- Me.Trace
127- (" Exception caught when reading '" & Filename & " ':"
128- & Exception_Message (E));
110+ begin
111+ Decoded := Decoder.Decode (Encoded);
112+ Error := Decoder.Error_Message;
113+ end ;
129114
130- return null ;
131- end Read_And_Convert_To_UTF8 ;
115+ GNAT.Strings.Free (Raw) ;
116+ end Read_And_Decode ;
132117
133118 -- --------
134119 -- Read --
@@ -143,65 +128,100 @@ package body LSP.Ada_Handlers.File_Readers is
143128 Diagnostics : in out
144129 Langkit_Support.Diagnostics.Diagnostics_Vectors.Vector)
145130 is
146- Doc : Document_Access;
147- Source : Preprocessed_Source := Preprocessed_Source'
148- (Buffer => null , Last => 0 ) ;
149- Buffer : GNAT.Strings.String_Access;
131+ Doc : Document_Access;
132+ Text : VSS.Strings.Virtual_String;
133+ Error : VSS.Strings.Virtual_String ;
134+
150135 begin
151136 -- First check if the file is an open document
137+
152138 Doc := Self.Handler.Get_Open_Document
153139 (URI => LSP.Types.File_To_URI (Filename),
154140 Force => False);
155141
156142 -- Preprocess the document's contents if open, or the file contents if
157143 -- not.
144+
158145 if Doc /= null then
159- Buffer := new String'
160- (VSS.Strings.Conversions.To_UTF_8_String (Doc.Text));
161- else
162- Buffer := Read_And_Convert_To_UTF8 (Filename, Charset);
146+ Text := Doc.Text;
163147
164- -- Return an empty sring when failing to read the file (i.e: when the
165- -- file has been deleted).
166- if Buffer = null then
167- Buffer := new String'(" " );
148+ else
149+ Read_And_Decode
150+ (Filename => Filename,
151+ Charset => VSS.Strings.Conversions.To_Virtual_String (Charset),
152+ Decoded => Text,
153+ Error => Error);
154+
155+ if not Error.Is_Empty then
156+ Diagnostics.Append
157+ (Langkit_Support.Diagnostics.Diagnostic'
158+ (Langkit_Support.Slocs.No_Source_Location_Range,
159+ VSS.Strings.Conversions.To_Unbounded_Wide_Wide_String
160+ (Error)));
168161 end if ;
169162 end if ;
170163
171164 -- If we have preprocessing data, use LAL's API to preoprocess the file.
172165 -- Otherwise, just decode the contents of the document/file.
173166
174167 if Self.Preprocessing_Data /= No_Preprocessor_Data then
175- Libadalang.Preprocessing.Preprocess
176- (Data => Self.Preprocessing_Data,
177- Filename => Filename,
178- Input => Buffer.all ,
179- Contents => Source,
180- Diagnostics => Diagnostics);
181-
182- if Source.Buffer = null then
183- -- Log the diagnostics when processing has failed
184- for Diag of Diagnostics loop
185- Me.Trace (Langkit_Support.Diagnostics.To_Pretty_String (Diag));
186- end loop ;
187- end if ;
188- end if ;
168+ declare
169+ Buffer : GNAT.Strings.String_Access :=
170+ new String
171+ (1 .. Integer (Text.After_Last_Character.First_UTF8_Offset));
172+ -- Size of the "utf-8" encoded data for text is known, so
173+ -- allocate necessary space and fill it later. Allocation on the
174+ -- stack can't be use here due to potential stack overflow.
175+ Source : Preprocessed_Source := Preprocessed_Source'
176+ (Buffer => null , Last => 0 );
189177
190- -- Decode the preprocessed buffer (or the initial contents when there is
191- -- no preprocessing needed) in utf-8.
192-
193- Decode_Buffer
194- (Buffer => (if Source.Buffer /= null then
195- Source.Buffer (1 .. Source.Last)
196- else
197- Buffer.all ),
198- Charset => " utf-8" ,
199- Read_BOM => Read_BOM,
200- Contents => Contents,
201- Diagnostics => Diagnostics);
202-
203- Free (Source);
204- GNAT.Strings.Free (Buffer);
178+ begin
179+ VSS.Strings.Conversions.Set_UTF_8_String (Text, Buffer.all );
180+
181+ Libadalang.Preprocessing.Preprocess
182+ (Data => Self.Preprocessing_Data,
183+ Filename => Filename,
184+ Input => Buffer.all ,
185+ Contents => Source,
186+ Diagnostics => Diagnostics);
187+
188+ if Source.Buffer = null then
189+ -- Log the diagnostics when processing has failed
190+
191+ for Diag of Diagnostics loop
192+ Me.Trace
193+ (Langkit_Support.Diagnostics.To_Pretty_String (Diag));
194+ end loop ;
195+ end if ;
196+
197+ -- Decode the preprocessed buffer (or the initial contents when
198+ -- there is no preprocessing needed) in utf-8.
199+
200+ Decode_Buffer
201+ (Buffer => (if Source.Buffer /= null then
202+ Source.Buffer (1 .. Source.Last)
203+ else
204+ Buffer.all ),
205+ Charset => " utf-8" ,
206+ Read_BOM => Read_BOM,
207+ Contents => Contents,
208+ Diagnostics => Diagnostics);
209+
210+ Free (Source);
211+ GNAT.Strings.Free (Buffer);
212+ end ;
213+
214+ else
215+ Contents :=
216+ (Buffer =>
217+ new Langkit_Support.Text.Text_Type
218+ (1 .. Natural (Text.Character_Length)),
219+ First => 1 ,
220+ Last => Natural (Text.Character_Length));
221+
222+ VSS.Strings.Conversions.Set_Wide_Wide_String
223+ (Text, Contents.Buffer.all );
224+ end if ;
205225 end Read ;
206226
207227end LSP.Ada_Handlers.File_Readers ;
0 commit comments