Skip to content

Commit d968d07

Browse files
committed
Add interface with docstrings for bibfmt
Signed-off-by: Marcello Seri <marcello.seri@gmail.com>
1 parent 5d5c859 commit d968d07

File tree

2 files changed

+156
-10
lines changed

2 files changed

+156
-10
lines changed

bibfmt/lib/bibtex.ml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,10 @@ let entry_type_of_string s =
7575
| _ -> Misc (* fallback to misc for unknown types *)
7676

7777
(* Simple Parser Combinator Library *)
78-
type 'a parser = string -> int -> ('a * int) option
78+
(* type 'a parser = string -> int -> ('a * int) option *)
7979

8080
let return x _input pos = Some (x, pos)
81-
let fail _input _pos = None
81+
(* let fail _input _pos = None *)
8282

8383
let bind p f input pos =
8484
match p input pos with Some (x, pos') -> f x input pos' | None -> None
@@ -148,9 +148,9 @@ let many_with_errors p input pos =
148148
in
149149
aux [] [] pos
150150

151-
let many1 p =
151+
(* let many1 p =
152152
p >>= fun x ->
153-
many p >>= fun xs -> return (x :: xs)
153+
many p >>= fun xs -> return (x :: xs) *)
154154

155155
let optional p input pos =
156156
match p input pos with
@@ -190,11 +190,11 @@ let ws p =
190190
p >>= fun x ->
191191
whitespace >>= fun _ -> return x
192192

193-
let string s input pos =
193+
(* let string s input pos =
194194
let len = String.length s in
195195
if pos + len <= String.length input && String.sub input pos len = s then
196196
Some (s, pos + len)
197-
else None
197+
else None *)
198198

199199
let take_while pred input pos =
200200
let start = pos in
@@ -366,22 +366,22 @@ let entry_comment_parser input pos =
366366
| None -> None
367367

368368
(* Parse either a field or a comment within an entry *)
369-
let entry_content_parser input pos =
369+
(* let entry_content_parser input pos =
370370
match entry_comment_parser input pos with
371371
| Some result -> Some result
372372
| None -> (
373373
match field_entry input pos with
374374
| Some (field, pos') -> Some (Field field, pos')
375-
| None -> None)
375+
| None -> None) *)
376376

377377
(* Whitespace parser that collects comments within entries *)
378-
let ws_with_comments p input pos =
378+
(* let ws_with_comments p input pos =
379379
let pos' = skip_whitespace input pos in
380380
match p input pos' with
381381
| Some (x, pos'') ->
382382
let pos''' = skip_whitespace input pos'' in
383383
Some (x, pos''')
384-
| None -> None
384+
| None -> None *)
385385

386386
(* Parse the contents of an entry - both fields and comments *)
387387
let entry_contents_parser input pos =

bibfmt/lib/bibtex.mli

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
(** BibTeX Parser and Pretty Printer
2+
3+
This module provides comprehensive functionality for parsing, manipulating,
4+
and formatting BibTeX bibliographic entries. It supports all standard BibTeX
5+
entry types and provides robust error handling for malformed input. *)
6+
7+
(** Type representing different ways field values can be formatted in BibTeX *)
8+
type field_value =
9+
| QuotedStringValue of string (** Value enclosed in double quotes *)
10+
| BracedStringValue of string (** Value enclosed in curly braces *)
11+
| UnquotedStringValue of string (** Raw unquoted value *)
12+
| NumberValue of int (** Numeric value *)
13+
14+
type field = { name : string; value : field_value }
15+
(** A BibTeX field with name and value *)
16+
17+
(** Standard BibTeX entry types *)
18+
type entry_type =
19+
| Article (** Journal article *)
20+
| Book (** Book with explicit publisher *)
21+
| Booklet
22+
(** Work that is printed and bound, but without a named publisher *)
23+
| Conference (** Conference proceedings entry *)
24+
| InBook (** Part of a book (chapter, section, etc.) *)
25+
| InCollection (** Part of a book having its own title *)
26+
| InProceedings (** Article in conference proceedings *)
27+
| Manual (** Technical documentation *)
28+
| MastersThesis (** Master's thesis *)
29+
| Misc (** Miscellaneous entry type *)
30+
| PhdThesis (** PhD thesis *)
31+
| Proceedings (** Conference proceedings *)
32+
| TechReport (** Technical report *)
33+
| Unpublished
34+
(** Document having an author and title, but not formally published *)
35+
36+
(** Content within a BibTeX entry *)
37+
type entry_content =
38+
| Field of field (** A field-value pair *)
39+
| EntryComment of string (** Comment within an entry *)
40+
41+
type bibtex_entry = {
42+
entry_type : entry_type; (** Type of the entry *)
43+
citekey : string; (** Citation key/identifier *)
44+
contents : entry_content list; (** List of fields and comments *)
45+
}
46+
(** Complete BibTeX entry *)
47+
48+
(** Top-level BibTeX item *)
49+
type bibtex_item =
50+
| Entry of bibtex_entry (** A bibliographic entry *)
51+
| Comment of string (** A comment line *)
52+
53+
type parse_error = { line : int; position : int; message : string }
54+
(** Parse error information *)
55+
56+
type parse_result = { items : bibtex_item list; errors : parse_error list }
57+
(** Result of parsing with potential errors *)
58+
59+
(** {2 Parsing Functions} *)
60+
61+
val parse_bibtex : string -> bibtex_item list
62+
(** [parse_bibtex input] parses a BibTeX string into a list of items. This
63+
function ignores parse errors and returns only successfully parsed items.
64+
@param input The BibTeX content as a string
65+
@return List of parsed BibTeX items *)
66+
67+
val parse_bibtex_with_errors : string -> parse_result
68+
(** [parse_bibtex_with_errors input] parses a BibTeX string and returns both
69+
successfully parsed items and any errors encountered.
70+
@param input The BibTeX content as a string
71+
@return Parse result containing items and errors *)
72+
73+
val has_parse_errors : parse_result -> bool
74+
(** [has_parse_errors result] checks if a parse result contains any errors.
75+
@param result The parse result to check
76+
@return true if there are errors, false otherwise *)
77+
78+
val get_parse_errors : parse_result -> parse_error list
79+
(** [get_parse_errors result] extracts the list of parse errors.
80+
@param result The parse result
81+
@return List of parse errors *)
82+
83+
val get_parsed_items : parse_result -> bibtex_item list
84+
(** [get_parsed_items result] extracts the list of successfully parsed items.
85+
@param result The parse result
86+
@return List of parsed BibTeX items *)
87+
88+
(** {2 Pretty Printers} *)
89+
90+
val pretty_print_bibtex : bibtex_item list -> string
91+
(** [pretty_print_bibtex items] formats a list of BibTeX items into a complete
92+
BibTeX string.
93+
@param items List of BibTeX items to format
94+
@return Complete formatted BibTeX string *)
95+
96+
val clean_bibtex : string -> string
97+
(** [clean_bibtex input] parses and reformats BibTeX input, effectively cleaning
98+
and normalizing the formatting.
99+
@param input The BibTeX content to clean
100+
@return Cleaned and reformatted BibTeX string *)
101+
102+
(** {2 Utility Functions for custom formatting or editing} *)
103+
104+
val string_of_entry_type : entry_type -> string
105+
(** [string_of_entry_type entry_type] converts an entry type to its string
106+
representation (e.g., Article becomes "article"). *)
107+
108+
val entry_type_of_string : string -> entry_type
109+
(** [entry_type_of_string str] converts a string to an entry type.
110+
@param str The string representation (case-insensitive)
111+
@return The corresponding entry type
112+
@raise Invalid_argument if the string is not a recognized entry type *)
113+
114+
val format_field_value : field_value -> string
115+
(** [format_field_value value] formats a field value for output.
116+
@param value The field value to format
117+
@return String representation of the value *)
118+
119+
val format_field_value_with_url_unescaping : string -> field_value -> string
120+
(** [format_field_value_with_url_unescaping field_name value] formats a field
121+
value with URL unescaping and Unicode normalization applied. Special
122+
handling is applied to URL fields.
123+
@param field_name
124+
The name of the field (used to determine if URL processing is needed)
125+
@param value The field value to format
126+
@return String representation with URLs unescaped if applicable *)
127+
128+
val format_field : field -> string
129+
(** [format_field field] formats a complete field (name = value).
130+
@param field The field to format
131+
@return String representation of the field *)
132+
133+
val format_entry_content : entry_content -> string
134+
(** [format_entry_content content] formats entry content (field or comment).
135+
@param content The entry content to format
136+
@return String representation of the content *)
137+
138+
val format_entry : bibtex_entry -> string
139+
(** [format_entry entry] formats a complete BibTeX entry.
140+
@param entry The entry to format
141+
@return String representation of the entry *)
142+
143+
val format_bibtex_item : bibtex_item -> string
144+
(** [format_bibtex_item item] formats a BibTeX item (entry or comment).
145+
@param item The item to format
146+
@return String representation of the item *)

0 commit comments

Comments
 (0)