Skip to content

Commit 0a18f4b

Browse files
Save
1 parent 8886ebe commit 0a18f4b

File tree

5 files changed

+107
-14
lines changed

5 files changed

+107
-14
lines changed

src/lsp/cobol_preproc/src_lexer.mll

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -174,24 +174,25 @@
174174
let tab = '\t'
175175
let newline = '\r'* '\n'
176176
let nnl = _ # ['\r' '\n'] (* anything but newline *)
177-
let sna = nnl nnl nnl nnl nnl nnl (* 6 chars; TODO: exclude tabs *)
177+
let nnl' = _ # ['\t' '\r' '\n'] (* anything but newline or tab *)
178+
let sna = nnl' nnl' nnl' nnl' nnl' nnl' (* 6 chars *)
178179
let tabs =
179180
(tab |
180-
nnl tab |
181-
nnl nnl tab |
182-
nnl nnl nnl tab |
183-
nnl nnl nnl nnl tab |
184-
nnl nnl nnl nnl nnl tab |
185-
nnl nnl nnl nnl nnl nnl tab |
186-
nnl nnl nnl nnl nnl nnl nnl tab)
181+
nnl' tab |
182+
nnl' nnl' tab |
183+
nnl' nnl' nnl' tab |
184+
nnl' nnl' nnl' nnl' tab |
185+
nnl' nnl' nnl' nnl' nnl' tab |
186+
nnl' nnl' nnl' nnl' nnl' nnl' tab |
187+
nnl' nnl' nnl' nnl' nnl' nnl' nnl' tab)
187188
let spaces = ([' ' '\t']*)
188189
let blank = [' ' '\009' '\r']
189-
let nonblank = nnl # blank
190+
let nonblank = nnl' # blank
190191
let blanks =(blank | '\t')+
191-
let blank_area_A = blank blank blank blanks | '\t'
192-
let nonblank_area_A =(nonblank nnl nnl nnl |
193-
blank nonblank nnl nnl |
194-
blank blank nonblank nnl |
192+
let blank_area_A = blank blank blank blanks
193+
let nonblank_area_A =(nonblank nnl' nnl' nnl' |
194+
blank nonblank nnl' nnl' |
195+
blank blank nonblank nnl' |
195196
blank blank blank nonblank)
196197
let nonblank = nonblank # ['\t'] (* now, also exclude tab from blank chars *)
197198
let separator = [ ',' ';' ]

src/lsp/cobol_preproc/src_lexing.ml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,27 @@ open Src_format
2020

2121
(* --- *)
2222

23+
let expand_tabs ?(tab_stop=8) ?(starting_col=0) src =
24+
match String.index_opt src '\t' with
25+
| None -> src
26+
| Some _ ->
27+
let buf = Buffer.create (String.length src) in
28+
let col = ref starting_col in
29+
let spaces = String.make tab_stop ' ' in
30+
String.iter (function
31+
| '\t' ->
32+
let n = (tab_stop - !col mod tab_stop) in
33+
Buffer.add_substring buf spaces 0 n;
34+
col := !col + 1
35+
| ('\n' | '\r') as c ->
36+
Buffer.add_char buf c;
37+
col := 0;
38+
| c ->
39+
Buffer.add_char buf c;
40+
incr col)
41+
src;
42+
Buffer.contents buf
43+
2344
let remove_blanks = Str.global_replace (Str.regexp " ") "" (* '\t'? *)
2445

2546
(* --- *)

src/lsp/cobol_preproc/src_lexing.mli

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
(* *)
1212
(**************************************************************************)
1313

14+
val expand_tabs: ?tab_stop:int -> ?starting_col:int -> string -> string
15+
16+
(** -- *)
17+
1418
type 'k state
1519

1620
val init_state: 'k Src_format.source_format -> 'k state

src/lsp/cobol_preproc/src_reader.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ let from ?source_format (input: Src_input.t) =
272272
let source_format, input = start_reading input ?source_format in
273273
match input with
274274
| String { contents; filename } ->
275-
from_string ~source_format ~filename contents
275+
from_string ~source_format ~filename (Src_lexing.expand_tabs contents)
276276
| Channel { ic; filename } ->
277277
from_channel ~source_format ~filename ic
278278

@@ -285,6 +285,7 @@ let restart make_lexing make_input ?source_format ?position
285285
input (Plx (s, prev_lexbuf)) =
286286
match position with
287287
| Some position when position.Lexing.pos_cnum > 0 ->
288+
let start_col = position.pos_cnum - position.pos_bol in
288289
let lexbuf = make_lexing ?with_positions:(Some true) input in
289290
Lexing.set_position lexbuf position;
290291
Lexing.set_filename lexbuf position.Lexing.pos_fname; (* useful? *)

test/cobol_parsing/tokens.ml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,69 @@ let%expect_test "token-locations-with-missing-program-id" =
146146
WORD[x]@<prog.cob:11-20|11-21>
147147
.@<prog.cob:11-21|11-22>
148148
EOF@<prog.cob:11-22|11-22> |}];;
149+
150+
let%expect_test "tokens-with-tabs" =
151+
Parser_testing.show_parsed_tokens ~source_format:(SF SFFixed)
152+
~parser_options:(Parser_testing.options ~verbose:true ())
153+
{|
154+
IDENTIFICATION DIVISION.
155+
PROGRAM-ID. prog.
156+
PROCEDURE DIVISION.
157+
STRING W-AGT ";" W-RUBNUM (J) ";"
158+
W-RENVOINOTE W-DEST-NOM ";"
159+
W-DEST-RUE1 ";" W-DEST-RUE2 ";"
160+
W-DEST-CP ";" W-DEST-VILLE
161+
";" W-DEST-TEL1 (1:2) " "
162+
W-DEST-TEL1 (3:2) " "
163+
W-DEST-TEL1 (5:2) " "
164+
W-DEST-TEL1 (7:2) " "
165+
W-DEST-TEL1 (9:2)
166+
delimited by " " into LARTISAN.
167+
|};
168+
[%expect {|
169+
Tks: PROCEDURE, DIVISION, .
170+
Tks':
171+
Tks: WORD[para-1], .
172+
Tks: IF
173+
Tks: WORD[X], >, DIGITS[9]
174+
Tks: THEN, IF
175+
Tks: WORD[X], >, DIGITS[6]
176+
Tks: THEN, DISPLAY, "2"
177+
Incoming: {UNDERLINE, REVERSE-VIDEO, LOWLIGHT, HIGHLIGHT, FOREGROUND-COLOR,
178+
ERASE, BLINK, BELL, BACKGROUND-COLOR}
179+
Tks': "2"
180+
Tks: ELSE, MOVE, DIGITS[1], TO
181+
Outgoing: {UNDERLINE, REVERSE-VIDEO, LOWLIGHT, HIGHLIGHT, FOREGROUND-COLOR,
182+
ERASE, BLINK, BELL, BACKGROUND-COLOR}
183+
Tks: WORD[x]
184+
Tks: ELSE, MOVE, DIGITS[1], TO, WORD[x], ., EOF
185+
Tks':
186+
PROCEDURE@<prog.cob:1-0|1-9>
187+
DIVISION@<prog.cob:1-10|1-18>
188+
.@<prog.cob:1-18|1-19>
189+
WORD[para-1]@<prog.cob:2-4|2-10>
190+
.@<prog.cob:2-10|2-11>
191+
IF@<prog.cob:3-8|3-10>
192+
WORD[X]@<prog.cob:3-11|3-12>
193+
>@<prog.cob:3-12|3-13>
194+
DIGITS[9]@<prog.cob:3-13|3-14>
195+
THEN@<prog.cob:4-8|4-12>
196+
IF@<prog.cob:5-11|5-13>
197+
WORD[X]@<prog.cob:5-14|5-15>
198+
>@<prog.cob:5-15|5-16>
199+
DIGITS[6]@<prog.cob:5-16|5-17>
200+
THEN@<prog.cob:6-11|6-15>
201+
DISPLAY@<prog.cob:7-14|7-21>
202+
"2"@<prog.cob:7-22|7-25>
203+
ELSE@<prog.cob:8-11|8-15>
204+
MOVE@<prog.cob:9-14|9-18>
205+
DIGITS[1]@<prog.cob:9-19|9-20>
206+
TO@<prog.cob:9-21|9-23>
207+
WORD[x]@<prog.cob:9-24|9-25>
208+
ELSE@<prog.cob:10-8|10-12>
209+
MOVE@<prog.cob:11-10|11-14>
210+
DIGITS[1]@<prog.cob:11-15|11-16>
211+
TO@<prog.cob:11-17|11-19>
212+
WORD[x]@<prog.cob:11-20|11-21>
213+
.@<prog.cob:11-21|11-22>
214+
EOF@<prog.cob:11-22|11-22> |}];;

0 commit comments

Comments
 (0)