Skip to content

Commit b9f64a7

Browse files
gpetiotjonludlam
authored andcommitted
Preserve td/th in the datatype
1 parent 7d74abf commit b9f64a7

File tree

6 files changed

+222
-249
lines changed

6 files changed

+222
-249
lines changed

src/ast.ml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ type inline_element =
3030
text. Similarly the [`Link] constructor has the link itself as first parameter
3131
and the second is the replacement text. *)
3232

33-
type 'a cell = 'a with_location list
33+
type 'a cell = 'a with_location list * [ `Header | `Data ]
3434
type 'a row = 'a cell list
3535
type 'a grid = 'a row list
36-
type 'a abstract_table = 'a row * 'a grid * alignment list
36+
type 'a abstract_table = 'a grid * alignment list
3737

3838
type nestable_block_element =
3939
[ `Paragraph of inline_element with_location list

src/lexer.mll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,10 +411,10 @@ rule token input = parse
411411
{ emit input `Begin_table_row }
412412

413413
| "{th"
414-
{ emit input `Begin_table_header }
414+
{ emit input (`Begin_table_cell `Header) }
415415

416416
| "{td"
417-
{ emit input `Begin_table_data }
417+
{ emit input (`Begin_table_cell `Data) }
418418

419419
| '{' (['0'-'9']+ as level) ':' (([^ '}'] # space_char)* as label)
420420
{ emit

src/syntax.ml

Lines changed: 26 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -64,54 +64,46 @@ module Table = struct
6464

6565
let valid_align_row lx = List.map valid_align lx |> Option.join_list
6666

67-
let create ~header ~data ~align : Ast.table =
67+
let create ~grid ~align : Ast.table =
6868
let to_block x = Loc.at x.Loc.location (`Paragraph [ x ]) in
69-
let cell_to_block = List.map to_block in
69+
let cell_to_block (x, k) = (List.map to_block x, k) in
7070
let row_to_block = List.map cell_to_block in
7171
let grid_to_block = List.map row_to_block in
72-
((row_to_block header, grid_to_block data, align), `Light)
72+
((grid_to_block grid, align), `Light)
7373

74-
let from_grid (grid : _ Ast.grid) : Ast.table =
74+
let with_kind kind : 'a with_location list list -> 'a Ast.row =
75+
List.map (fun c -> (c, kind))
76+
77+
let from_grid grid : Ast.table =
7578
match grid with
76-
| [] -> create ~header:[] ~data:[] ~align:[]
79+
| [] -> create ~grid:[] ~align:[]
7780
| row1 :: rows2_N -> (
7881
match valid_align_row row1 with
7982
(* If the first line is the align row, everything else is data. *)
80-
| Some align -> create ~header:[] ~data:rows2_N ~align
83+
| Some align ->
84+
create ~grid:(List.map (with_kind `Data) rows2_N) ~align
8185
| None -> (
8286
match rows2_N with
8387
(* Only 1 line, if this is not the align row this is data. *)
84-
| [] -> create ~header:[] ~data:[ row1 ] ~align:[]
88+
| [] -> create ~grid:[ with_kind `Data row1 ] ~align:[]
8589
| row2 :: rows3_N -> (
8690
match valid_align_row row2 with
8791
(* If the second line is the align row, the first one is the
8892
header and the rest is data. *)
89-
| Some align -> create ~header:row1 ~data:rows3_N ~align
93+
| Some align ->
94+
let header = with_kind `Header row1 in
95+
let data = List.map (with_kind `Data) rows3_N in
96+
create ~grid:(header :: data) ~align
9097
(* No align row in the first 2 lines, everything is considered
9198
data. *)
92-
| None -> create ~header:[] ~data:grid ~align:[])))
99+
| None ->
100+
create ~grid:(List.map (with_kind `Data) grid) ~align:[]))
101+
)
93102
end
94103

95104
module Heavy_syntax = struct
96-
let create ~header ~data : Ast.table = ((header, data, []), `Heavy)
97-
98-
let valid_header_row row =
99-
List.map (function `Header, x -> Some x | `Data, _ -> None) row
100-
|> Option.join_list
101-
102-
let from_grid grid : Ast.table =
103-
match grid with
104-
| [] -> create ~header:[] ~data:[]
105-
| row1 :: rows2_N ->
106-
let header, data =
107-
(* If the first line is the header row, everything else is data. *)
108-
match valid_header_row row1 with
109-
| Some header -> (header, rows2_N)
110-
(* Otherwise everything is considered data. *)
111-
| None -> ([], grid)
112-
in
113-
let data = List.map (List.map snd) data in
114-
create ~header ~data
105+
let create ~grid : Ast.table = ((grid, []), `Heavy)
106+
let from_grid grid : Ast.table = create ~grid
115107
end
116108
end
117109

@@ -475,7 +467,7 @@ let paragraph : input -> Ast.nestable_block_element with_location =
475467
(* {3 Helper types} *)
476468

477469
(* The interpretation of tokens in the block parser depends on where on a line
478-
each token appears. The seven possible "locations" are:
470+
each token appears. The six possible "locations" are:
479471
480472
- [`At_start_of_line], when only whitespace has been read on the current
481473
line.
@@ -485,8 +477,7 @@ let paragraph : input -> Ast.nestable_block_element with_location =
485477
[-], has been read, and only whitespace has been read since.
486478
- [`After_explicit_list_bullet], when a valid explicit bullet, such as [{li],
487479
has been read, and only whitespace has been read since.
488-
- [`After_table_header], when a table header opening markup ('{th') has been read.
489-
- [`After_table_cell], when a table cell opening markup ('{td') has been read.
480+
- [`After_table_cell], when a table cell opening markup ('{th' or '{td') has been read.
490481
- [`After_text], when any other valid non-whitespace token has already been
491482
read on the current line.
492483
@@ -510,7 +501,6 @@ type where_in_line =
510501
| `After_tag
511502
| `After_shorthand_bullet
512503
| `After_explicit_list_bullet
513-
| `After_table_header
514504
| `After_table_cell
515505
| `After_text ]
516506

@@ -565,7 +555,6 @@ type ('block, 'stops_at_which_tokens) context =
565555
| Top_level : (Ast.block_element, stops_at_delimiters) context
566556
| In_shorthand_list : (Ast.nestable_block_element, stopped_implicitly) context
567557
| In_explicit_list : (Ast.nestable_block_element, stops_at_delimiters) context
568-
| In_table_header : (Ast.nestable_block_element, stops_at_delimiters) context
569558
| In_table_cell : (Ast.nestable_block_element, stops_at_delimiters) context
570559
| In_tag : (Ast.nestable_block_element, Token.t) context
571560

@@ -581,7 +570,6 @@ let accepted_in_all_contexts :
581570
| Top_level -> (block :> Ast.block_element)
582571
| In_shorthand_list -> block
583572
| In_explicit_list -> block
584-
| In_table_header -> block
585573
| In_table_cell -> block
586574
| In_tag -> block
587575

@@ -674,7 +662,6 @@ let rec block_element_list :
674662
| Top_level -> (List.rev acc, next_token, where_in_line)
675663
| In_shorthand_list -> (List.rev acc, next_token, where_in_line)
676664
| In_explicit_list -> (List.rev acc, next_token, where_in_line)
677-
| In_table_header -> (List.rev acc, next_token, where_in_line)
678665
| In_table_cell -> (List.rev acc, next_token, where_in_line)
679666
| In_tag -> (List.rev acc, next_token, where_in_line))
680667
(* Whitespace. This can terminate some kinds of block elements. It is also
@@ -726,8 +713,7 @@ let rec block_element_list :
726713
consume_block_elements ~parsed_a_tag where_in_line acc
727714
(* Table cells ([{th ...}] and [{td ...}]) can never appear directly
728715
in block content. They can only appear inside [{tr ...}]. *)
729-
| { value = (`Begin_table_header | `Begin_table_data) as token; location }
730-
->
716+
| { value = `Begin_table_cell _ as token; location } ->
731717
let suggestion =
732718
Printf.sprintf "move %s into %s." (Token.print token)
733719
(Token.describe `Begin_table_row)
@@ -777,7 +763,6 @@ let rec block_element_list :
777763
if where_in_line = `At_start_of_line then
778764
(List.rev acc, next_token, where_in_line)
779765
else recover_when_not_at_top_level context
780-
| In_table_header -> recover_when_not_at_top_level context
781766
| In_table_cell -> recover_when_not_at_top_level context
782767
| In_tag ->
783768
if where_in_line = `At_start_of_line then
@@ -1028,7 +1013,6 @@ let rec block_element_list :
10281013
(List.rev acc, next_token, where_in_line)
10291014
else recover_when_not_at_top_level context
10301015
| In_explicit_list -> recover_when_not_at_top_level context
1031-
| In_table_header -> recover_when_not_at_top_level context
10321016
| In_table_cell -> recover_when_not_at_top_level context
10331017
| In_tag -> recover_when_not_at_top_level context
10341018
| Top_level ->
@@ -1089,7 +1073,6 @@ let rec block_element_list :
10891073
| Top_level -> `At_start_of_line
10901074
| In_shorthand_list -> `After_shorthand_bullet
10911075
| In_explicit_list -> `After_explicit_list_bullet
1092-
| In_table_header -> `After_table_header
10931076
| In_table_cell -> `After_table_cell
10941077
| In_tag -> `After_tag
10951078
in
@@ -1334,7 +1317,7 @@ and heavy_table ~parent_markup ~parent_markup_location input =
13341317
(Table.Heavy_syntax.from_grid grid, brace_location)
13351318

13361319
(* Consumes a sequence of table cells (starting with '{th ...}' or '{td ... }',
1337-
which are represented by [`Begin_table_header] [`Begin_table_data] tokens).
1320+
which are represented by [`Begin_table_cell] tokens).
13381321
13391322
This function is called immediately after '{tr' ([`Begin_table_row]) is
13401323
read. The only "valid" way to exit is by reading a [`Right_brace] token,
@@ -1343,13 +1326,7 @@ and heavy_table_row ~parent_markup input =
13431326
let rec consume_cell_items acc =
13441327
Reader.until_rbrace input acc >>> fun next_token ->
13451328
match next_token.Loc.value with
1346-
| `Begin_table_header as token ->
1347-
junk input;
1348-
let content, _brace_location =
1349-
heavy_table_header input ~parent_markup:token
1350-
in
1351-
consume_cell_items ((`Header, content) :: acc)
1352-
| `Begin_table_data as token ->
1329+
| `Begin_table_cell kind as token ->
13531330
junk input;
13541331
let content, token_after_list_item, _where_in_line =
13551332
block_element_list In_table_cell ~parent_markup:token input
@@ -1360,7 +1337,7 @@ and heavy_table_row ~parent_markup input =
13601337
Parse_error.not_allowed token_after_list_item.location
13611338
~what:(Token.describe `End) ~in_what:(Token.describe token)
13621339
|> add_warning input);
1363-
consume_cell_items ((`Data, content) :: acc)
1340+
consume_cell_items ((content, kind) :: acc)
13641341
| token ->
13651342
Parse_error.not_allowed next_token.location ~what:(Token.describe token)
13661343
~in_what:(Token.describe parent_markup)
@@ -1370,28 +1347,6 @@ and heavy_table_row ~parent_markup input =
13701347
in
13711348
consume_cell_items []
13721349

1373-
(* Consumes a table header.
1374-
1375-
This function is called immediately after '{th' ([`Begin_table_header]) is
1376-
read. The only "valid" way to exit is by reading a [`Right_brace] token,
1377-
which is consumed. *)
1378-
and heavy_table_header ~parent_markup input =
1379-
let rec consume_items acc =
1380-
Reader.until_rbrace input acc >>> fun next_token ->
1381-
(match acc with
1382-
| _ :: _ ->
1383-
Parse_error.not_allowed next_token.location
1384-
~what:(Token.describe next_token.value)
1385-
~in_what:(Token.describe parent_markup)
1386-
|> add_warning input
1387-
| [] -> ());
1388-
let content, _token_after_list_item, _where_in_line =
1389-
block_element_list In_table_header ~parent_markup input
1390-
in
1391-
consume_items content
1392-
in
1393-
consume_items []
1394-
13951350
(* {2 Entry point} *)
13961351

13971352
let parse warnings tokens =

src/token.ml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ type t =
7272
| (* Table markup. *)
7373
`Begin_table of [ `Light | `Heavy ]
7474
| `Begin_table_row
75-
| `Begin_table_header
76-
| `Begin_table_data
75+
| `Begin_table_cell of [ `Header | `Data ]
7776
| `Minus
7877
| `Plus
7978
| `Bar
@@ -97,8 +96,8 @@ let print : [< t ] -> string = function
9796
let syntax = match syntax with `Light -> "t" | `Heavy -> "table" in
9897
Printf.sprintf "'{%s'" syntax
9998
| `Begin_table_row -> "'{tr'"
100-
| `Begin_table_header -> "'{th'"
101-
| `Begin_table_data -> "'{td'"
99+
| `Begin_table_cell `Header -> "'{th'"
100+
| `Begin_table_cell `Data -> "'{td'"
102101
| `Minus -> "'-'"
103102
| `Plus -> "'+'"
104103
| `Bar -> "'|'"
@@ -158,8 +157,8 @@ let describe : [< t | `Comment ] -> string = function
158157
| `Begin_table `Light -> "'{t ...}' (table)"
159158
| `Begin_table `Heavy -> "'{table ...}' (table)"
160159
| `Begin_table_row -> "'{tr ...}' (table row)"
161-
| `Begin_table_header -> "'{th ... }' (table header cell)"
162-
| `Begin_table_data -> "'{td ... }' (table data cell)"
160+
| `Begin_table_cell `Header -> "'{th ... }' (table header cell)"
161+
| `Begin_table_cell `Data -> "'{td ... }' (table data cell)"
163162
| `Minus -> "'-' (bulleted list item)"
164163
| `Plus -> "'+' (numbered list item)"
165164
| `Bar -> "'|'"

test/test.ml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,18 +85,17 @@ module Ast_to_sexp = struct
8585
|> fun items -> List items
8686
in
8787
List [ Atom kind; Atom weight; items ]
88-
| `Table ((header, data, align), s) ->
88+
| `Table ((data, align), s) ->
8989
let syntax = function `Light -> "light" | `Heavy -> "heavy" in
90+
let kind = function `Header -> "header" | `Data -> "data" in
9091
let map name x f = List [ Atom name; List (List.map f x) ] in
9192
List
9293
[
9394
Atom "table";
9495
List [ Atom "syntax"; Atom (syntax s) ];
95-
( map "header" header @@ fun cell ->
96-
map "cell" cell @@ at.at (nestable_block_element at) );
9796
( map "data" data @@ fun row ->
98-
map "row" row @@ fun cell ->
99-
map "cell" cell @@ at.at (nestable_block_element at) );
97+
map "row" row @@ fun (cell, k) ->
98+
map (kind k) cell @@ at.at (nestable_block_element at) );
10099
map "align" align @@ alignment;
101100
]
102101

0 commit comments

Comments
 (0)