Skip to content

Commit d0a28cf

Browse files
Julowgpetiot
andauthored
Refactor handling of comments (#2371)
This rewrite part of the code that parses and formats comments to remove repeated or inconsistent code. One function interprets comments: Cmt.decode. It returns a richer type that allow to implement formatting as it was before. The normalization is changed to use that, no more is_docstring and normalization of indentation is now consistent. This fixes normalization problem for comments parsed as doc and cinaps comments. Improvements are: - Asterisk-prefixed comments are formatted specially. The code was present but was never called. - Commented-out code is no longer wrapped. - Fix a bug where formatted cinaps comments wouldn't pass normalization. Co-authored-by: Guillaume Petiot <[email protected]>
1 parent 5f796f2 commit d0a28cf

File tree

85 files changed

+1162
-427
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+1162
-427
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ profile. This started with version 0.26.0.
88

99
### Changed
1010

11+
- \* Consistent formatting of comments (#2371, @Julow)
1112
- Documentation comments are now formatted by default (#2390, @Julow)
1213
Use the option `parse-docstrings = false` to disable.
1314
- \* Janestreet profile: do not break `fun _ -> function` (#2460, @tdelvecchio-jsc)

lib/Cmt.ml

Lines changed: 106 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -82,40 +82,118 @@ let pp_error fs {kind; cmt_kind} =
8282
formatting using the option --no-parse-docstrings.\n\
8383
%!" )
8484

85-
module T_no_loc = struct
86-
include T
87-
88-
let compare =
89-
Comparable.lexicographic [Comparable.lift String.compare ~f:txt]
90-
end
91-
92-
type loc = t
93-
94-
module Comparator_no_loc = struct
95-
type t = loc
96-
97-
include Comparator.Make (T_no_loc)
98-
end
99-
10085
type pos = Before | Within | After
10186

102-
let unindent_lines ~offset first_line tl_lines =
103-
(* The indentation of the first line must account for the location of the
104-
comment opening *)
105-
let fl_spaces =
106-
Option.value ~default:0 (String.indent_of_line first_line)
107-
in
108-
let fl_indent = fl_spaces + offset in
109-
let min_indent =
110-
List.fold_left ~init:fl_indent
87+
type decoded_kind =
88+
| Verbatim of string
89+
| Doc of string
90+
| Normal of string
91+
| Code of string
92+
| Asterisk_prefixed of string list
93+
94+
type decoded = {prefix: string; suffix: string; kind: decoded_kind}
95+
96+
(** [~content_offset] indicates at which column the body of the comment
97+
starts (1-indexed). [~max_idnent] indicates the maximum amount of
98+
indentation to trim. *)
99+
let unindent_lines ?(max_indent = Stdlib.max_int) ~content_offset first_line
100+
tl_lines =
101+
let tl_indent =
102+
List.fold_left ~init:max_indent
111103
~f:(fun acc s ->
112104
Option.value_map ~default:acc ~f:(min acc) (String.indent_of_line s) )
113105
tl_lines
114106
in
115-
(* Completely trim the first line *)
116-
String.drop_prefix first_line fl_spaces
107+
(* The indentation of the first line must account for the location of the
108+
comment opening. Don't account for the first line if it's empty.
109+
[fl_trim] is the number of characters to remove from the first line. *)
110+
let fl_trim, fl_indent =
111+
match String.indent_of_line first_line with
112+
| Some i ->
113+
(max 0 (min i (tl_indent - content_offset)), i + content_offset - 1)
114+
| None -> (String.length first_line, max_indent)
115+
in
116+
let min_indent = min tl_indent fl_indent in
117+
let first_line = String.drop_prefix first_line fl_trim in
118+
first_line
117119
:: List.map ~f:(fun s -> String.drop_prefix s min_indent) tl_lines
118120

119-
let unindent_lines ~offset = function
121+
let unindent_lines ?max_indent ~content_offset txt =
122+
match String.split ~on:'\n' txt with
120123
| [] -> []
121-
| hd :: tl -> unindent_lines ~offset hd tl
124+
| hd :: tl -> unindent_lines ?max_indent ~content_offset hd tl
125+
126+
let is_all_whitespace s = String.for_all s ~f:Char.is_whitespace
127+
128+
let split_asterisk_prefixed =
129+
let prefix = "*" in
130+
let drop_prefix s = String.drop_prefix s (String.length prefix) in
131+
let rec lines_are_asterisk_prefixed = function
132+
| [] -> true
133+
(* Allow the last line to be empty *)
134+
| [last] when is_all_whitespace last -> true
135+
| hd :: tl ->
136+
String.is_prefix hd ~prefix && lines_are_asterisk_prefixed tl
137+
in
138+
function
139+
(* Check whether the second line is not empty to avoid matching a comment
140+
with no asterisks. *)
141+
| fst_line :: (snd_line :: _ as tl)
142+
when lines_are_asterisk_prefixed tl && not (is_all_whitespace snd_line)
143+
->
144+
Some (fst_line :: List.map tl ~f:drop_prefix)
145+
| _ -> None
146+
147+
let mk ?(prefix = "") ?(suffix = "") kind = {prefix; suffix; kind}
148+
149+
let decode_comment ~parse_comments_as_doc txt loc =
150+
let txt =
151+
(* Windows compatibility *)
152+
let f = function '\r' -> false | _ -> true in
153+
String.filter txt ~f
154+
in
155+
let opn_offset =
156+
let {Lexing.pos_cnum; pos_bol; _} = loc.Location.loc_start in
157+
pos_cnum - pos_bol + 1
158+
in
159+
if String.length txt >= 2 then
160+
match txt.[0] with
161+
| '$' when not (Char.is_whitespace txt.[1]) -> mk (Verbatim txt)
162+
| '$' ->
163+
let dollar_suf = Char.equal txt.[String.length txt - 1] '$' in
164+
let suffix = if dollar_suf then "$" else "" in
165+
let code =
166+
let len = String.length txt - if dollar_suf then 2 else 1 in
167+
String.sub ~pos:1 ~len txt
168+
in
169+
mk ~prefix:"$" ~suffix (Code code)
170+
| '=' -> mk (Verbatim txt)
171+
| _ when is_all_whitespace txt ->
172+
mk (Verbatim " ") (* Make sure not to format to [(**)]. *)
173+
| _ when parse_comments_as_doc -> mk (Doc txt)
174+
| _ -> (
175+
let lines =
176+
let content_offset = opn_offset + 2 in
177+
unindent_lines ~content_offset txt
178+
in
179+
match split_asterisk_prefixed lines with
180+
| Some deprefixed_lines -> mk (Asterisk_prefixed deprefixed_lines)
181+
| None -> mk (Normal txt) )
182+
else
183+
match txt with
184+
(* "(**)" is not parsed as a docstring but as a regular comment
185+
containing '*' and would be rewritten as "(***)" *)
186+
| "*" when Location.width loc = 4 -> mk (Verbatim "")
187+
| ("*" | "$") as txt -> mk (Verbatim txt)
188+
| "\n" | " " -> mk (Verbatim " ")
189+
| _ -> mk (Normal txt)
190+
191+
let decode_docstring _loc = function
192+
| "" -> mk (Verbatim "")
193+
| ("*" | "$") as txt -> mk (Verbatim txt)
194+
| "\n" | " " -> mk (Verbatim " ")
195+
| txt -> mk ~prefix:"*" (Doc txt)
196+
197+
let decode ~parse_comments_as_doc = function
198+
| Comment {txt; loc} -> decode_comment ~parse_comments_as_doc txt loc
199+
| Docstring {txt; loc} -> decode_docstring loc txt

lib/Cmt.mli

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,19 @@ val pp_error : Format.formatter -> error -> unit
3333

3434
type pos = Before | Within | After
3535

36-
type loc = t
37-
38-
module Comparator_no_loc : sig
39-
type t = loc
40-
41-
include Comparator.S with type t := t
42-
end
43-
44-
val unindent_lines : offset:int -> string list -> string list
45-
(** Detect and remove the baseline indentation of a comment or a code block.
46-
[offset] is the column number at which the first line starts. *)
36+
type decoded_kind =
37+
| Verbatim of string (** Original content. *)
38+
| Doc of string (** Original content. *)
39+
| Normal of string
40+
(** Original content with indentation trimmed. Trailing spaces are not
41+
removed. *)
42+
| Code of string (** Source code with indentation removed. *)
43+
| Asterisk_prefixed of string list
44+
(** Line splitted with asterisks removed. *)
45+
46+
type decoded =
47+
{ prefix: string (** Just after the opening. *)
48+
; suffix: string (** Just before the closing. *)
49+
; kind: decoded_kind }
50+
51+
val decode : parse_comments_as_doc:bool -> t -> decoded

0 commit comments

Comments
 (0)