Skip to content

Commit 9b9fdb1

Browse files
Add src_rewriting module + edit test quotation
1 parent aabf542 commit 9b9fdb1

File tree

6 files changed

+112
-77
lines changed

6 files changed

+112
-77
lines changed

src/lsp/cobol_preproc/src_lexing.ml

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,6 @@ open Src_format
2020

2121
(* --- *)
2222

23-
let expand_tabs ?(tab_stop=8) ?(starting_col=0) src =
24-
match String.index_opt src '\t' with
25-
| None -> src
26-
| Some _ ->
27-
let buf = Buffer.create (String.length src) in
28-
let col = ref starting_col in
29-
let spaces = String.make tab_stop ' ' in
30-
String.iter (function
31-
| '\t' ->
32-
let n = (tab_stop - !col mod tab_stop) in
33-
Buffer.add_substring buf spaces 0 n;
34-
col := !col + 1
35-
| ('\n' | '\r') as c ->
36-
Buffer.add_char buf c;
37-
col := 0;
38-
| c ->
39-
Buffer.add_char buf c;
40-
incr col)
41-
src;
42-
Buffer.contents buf
43-
4423
let remove_blanks = Str.global_replace (Str.regexp " ") "" (* '\t'? *)
4524

4625
(* --- *)
@@ -696,3 +675,4 @@ let separator' ~char ~k = free_text (free_separator ~char) ~k
696675
let alphanum_lit' ~k = free_text free_alphanum_lit ~k
697676

698677
(* --- *)
678+

src/lsp/cobol_preproc/src_lexing.mli

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
(* *)
1212
(**************************************************************************)
1313

14-
val expand_tabs: ?tab_stop:int -> ?starting_col:int -> string -> string
15-
1614
(** -- *)
1715

1816
type 'k state
@@ -103,3 +101,4 @@ val unexpected
103101
-> ?severity: [`Error | `Warn]
104102
-> k: ('k state -> Lexing.lexbuf -> 'b)
105103
-> 'k state -> Lexing.lexbuf -> 'b
104+

src/lsp/cobol_preproc/src_reader.ml

Lines changed: 2 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -246,53 +246,12 @@ let make make_lexing ?filename ~source_format input =
246246
Option.iter (Lexing.set_filename lexbuf) filename;
247247
Plx (Src_lexing.init_state source_format, lexbuf)
248248

249-
let from_channel_expanding_tabs ?with_positions ?(tab_stop = 8) ic : Lexing.lexbuf =
250-
let read_buf = Bytes.create 4096 in
251-
let read_pos = ref 0 in (* current position in read_buf *)
252-
let read_len = ref 0 in (* valid bytes in read_buf *)
253-
let col = ref 0 in (* current column (0-based) *)
254-
let refill buf len =
255-
let written = ref 0 in
256-
let rec loop () =
257-
if !written >= len then () (* lexer buffer full *)
258-
else begin
259-
(* Refill read_buf if exhausted *)
260-
if !read_pos >= !read_len then begin
261-
let n = input ic read_buf 0 (Bytes.length read_buf) in
262-
if n = 0 then () (* EOF *)
263-
else begin read_pos := 0; read_len := n; loop () end
264-
end else
265-
let c = Bytes.get read_buf !read_pos in
266-
if c = '\t' then begin
267-
let spaces = tab_stop - (!col mod tab_stop) in
268-
let n = min spaces (len - !written) in
269-
Bytes.fill buf !written n ' ';
270-
written := !written + n;
271-
col := !col + n;
272-
if n = spaces then (* fully expanded this tab *)
273-
incr read_pos
274-
(* else: partially expanded; we'll resume next refill *)
275-
end else begin
276-
Bytes.set buf !written c;
277-
incr written;
278-
incr read_pos;
279-
if c = '\n' then col := 0
280-
else col := !col + 1
281-
end;
282-
loop ()
283-
end
284-
in
285-
loop ();
286-
!written
287-
in
288-
Lexing.from_function ?with_positions refill
289-
290249
(* --- *)
291250

292251
let from_string = make Lexing.from_string
293252
let from_channel = make Lexing.from_channel
294253
let from_channel_no_tabs ?(tab_stop=8) =
295-
make (from_channel_expanding_tabs ~tab_stop)
254+
make (Src_rewriting.from_channel_expanding_tabs ~tab_stop)
296255

297256

298257
let fill buff ~lookup_len (input: Src_input.t) =
@@ -320,7 +279,7 @@ let from ?source_format (input: Src_input.t) =
320279
let source_format, input = start_reading input ?source_format in
321280
match input with
322281
| String { contents; filename } ->
323-
from_string ~source_format ~filename (Src_lexing.expand_tabs contents)
282+
from_string ~source_format ~filename (Src_rewriting.expand_tabs contents)
324283
| Channel { ic; filename } ->
325284
from_channel_no_tabs ~source_format ~filename ic
326285

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
(**************************************************************************)
2+
(* *)
3+
(* SuperBOL OSS Studio *)
4+
(* *)
5+
(* Copyright (c) 2026 OCamlPro SAS *)
6+
(* *)
7+
(* All rights reserved. *)
8+
(* This source code is licensed under the GNU Affero General Public *)
9+
(* License version 3 found in the LICENSE.md file in the root directory *)
10+
(* of this source tree. *)
11+
(* *)
12+
(**************************************************************************)
13+
14+
let expand_tabs ?(tab_stop=8) ?(starting_col=0) src =
15+
match String.index_opt src '\t' with
16+
| None -> src
17+
| Some _ ->
18+
let buf = Buffer.create (String.length src) in
19+
let col = ref starting_col in
20+
let spaces = String.make tab_stop ' ' in
21+
String.iter (function
22+
| '\t' ->
23+
let n = (tab_stop - !col mod tab_stop) in
24+
Buffer.add_substring buf spaces 0 n;
25+
col := !col + 1
26+
| ('\n' | '\r') as c ->
27+
Buffer.add_char buf c;
28+
col := 0;
29+
| c ->
30+
Buffer.add_char buf c;
31+
incr col)
32+
src;
33+
Buffer.contents buf
34+
35+
let from_channel_expanding_tabs ?with_positions ?(tab_stop = 8) ic : Lexing.lexbuf =
36+
let read_buf = Bytes.create 4096 in
37+
let read_pos = ref 0 in (* current position in read_buf *)
38+
let read_len = ref 0 in (* valid bytes in read_buf *)
39+
let col = ref 0 in (* current column (0-based) *)
40+
let refill buf len =
41+
let written = ref 0 in
42+
let rec loop () =
43+
if !written >= len then () (* lexer buffer full *)
44+
else begin
45+
(* Refill read_buf if exhausted *)
46+
if !read_pos >= !read_len then begin
47+
let n = input ic read_buf 0 (Bytes.length read_buf) in
48+
if n = 0 then () (* EOF *)
49+
else begin read_pos := 0; read_len := n; loop () end
50+
end else
51+
let c = Bytes.get read_buf !read_pos in
52+
if c = '\t' then begin
53+
let spaces = tab_stop - (!col mod tab_stop) in
54+
let n = min spaces (len - !written) in
55+
Bytes.fill buf !written n ' ';
56+
written := !written + n;
57+
col := !col + n;
58+
if n = spaces then (* fully expanded this tab *)
59+
incr read_pos
60+
(* else: partially expanded; we'll resume next refill *)
61+
end else begin
62+
Bytes.set buf !written c;
63+
incr written;
64+
incr read_pos;
65+
if c = '\n' then col := 0
66+
else col := !col + 1
67+
end;
68+
loop ()
69+
end
70+
in
71+
loop ();
72+
!written
73+
in
74+
Lexing.from_function ?with_positions refill
75+
76+
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
(**************************************************************************)
2+
(* *)
3+
(* SuperBOL OSS Studio *)
4+
(* *)
5+
(* Copyright (c) 2026 OCamlPro SAS *)
6+
(* *)
7+
(* All rights reserved. *)
8+
(* This source code is licensed under the GNU Affero General Public *)
9+
(* License version 3 found in the LICENSE.md file in the root directory *)
10+
(* of this source tree. *)
11+
(* *)
12+
(**************************************************************************)
13+
14+
val expand_tabs: ?tab_stop:int -> ?starting_col:int -> string -> string
15+
16+
val from_channel_expanding_tabs:
17+
?with_positions:bool ->
18+
?tab_stop:int ->
19+
in_channel ->
20+
Lexing.lexbuf
21+

test/cobol_parsing/tokens.ml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -150,21 +150,21 @@ let%expect_test "token-locations-with-missing-program-id" =
150150
let%expect_test "tokens-with-tabs" =
151151
Parser_testing.show_parsed_tokens ~source_format:(SF SFFixed)
152152
~parser_options:(Parser_testing.options ~verbose:true ())
153-
{|
153+
"
154154
IDENTIFICATION DIVISION.
155155
PROGRAM-ID. prog.
156156
PROCEDURE DIVISION.
157-
STRING W-AGT ";" W-RUBNUM (J) ";"
158-
W-RENVOINOTE W-DEST-NOM ";"
159-
W-DEST-RUE1 ";" W-DEST-RUE2 ";"
160-
W-DEST-CP ";" W-DEST-VILLE
161-
";" W-DEST-TEL1 (1:2) " "
162-
W-DEST-TEL1 (3:2) " "
163-
W-DEST-TEL1 (5:2) " "
164-
W-DEST-TEL1 (7:2) " "
157+
\t\tSTRING \tW-AGT \";\" W-RUBNUM (J) \";\"
158+
\t\tW-RENVOINOTE W-DEST-NOM \";\"
159+
\t\tW-DEST-RUE1 \";\" W-DEST-RUE2 \";\"
160+
\t\tW-DEST-CP \";\" W-DEST-VILLE
161+
\t\t\";\" W-DEST-TEL1 (1:2) \" \"
162+
W-DEST-TEL1 (3:2) \" \"
163+
W-DEST-TEL1 (5:2) \" \"
164+
W-DEST-TEL1 (7:2) \" \"
165165
W-DEST-TEL1 (9:2)
166-
delimited by " " into LARTISAN.
167-
|};
166+
delimited by \" \" into LARTISAN.
167+
";
168168
[%expect {|
169169
Tks: IDENTIFICATION, DIVISION, .
170170
Tks: PROGRAM-ID, ., INFO_WORD[prog], .

0 commit comments

Comments
 (0)