55 (for-label racket/base
66 racket/contract
77 racket/list
8+ racket/port
89 xml
910 xml/plist))
1011
1112@(define xml-eval (make-base-eval))
1213@(define plist-eval (make-base-eval))
1314@interaction-eval[#:eval xml-eval (require xml)]
1415@interaction-eval[#:eval xml-eval (require racket/list)]
16+ @interaction-eval[#:eval xml-eval (require racket/port)]
1517@interaction-eval[#:eval plist-eval (require xml/plist)]
18+ @(define reference '(lib "scribblings/reference/reference.scrbl " ))
1619
1720@title{XML: Parsing and Writing}
1821
@@ -30,15 +33,25 @@ Declaration (DTD) processing, including preservation of DTDs in read documents,
3033It also does not expand user-defined entities or read user-defined entities in attributes.
3134It does not interpret namespaces either.
3235
36+ @local-table-of-contents[]
37+
3338@; ----------------------------------------------------------------------
3439
3540@section{Datatypes}
3641
42+ @subsection{Structures}
43+
3744@defstruct[location ([line (or/c #f exact-nonnegative-integer?)]
3845 [char (or/c #f exact-nonnegative-integer?)]
3946 [offset exact-nonnegative-integer?])]{
4047
41- Represents a location in an input stream. The offset is a character offset unless @racket[xml-count-bytes] is @racket[#t ], in which case it is a byte offset.}
48+ Represents a location in an input stream. The offset is a
49+ character offset unless @racket[xml-count-bytes] is
50+ @racket[#t ], in which case it is a byte offset.
51+
52+ @history[
53+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
54+ ]}
4255
4356@defthing[location/c contract?]{
4457 Equivalent to @racket[(or/c location? symbol? #f )].
@@ -53,82 +66,200 @@ Represents a source location. Other structure types extend
5366When XML is generated from an input stream by @racket[read-xml],
5467locations are represented by @racket[location] instances. When XML
5568structures are generated by @racket[xexpr->xml], then locations are
56- symbols.}
69+ symbols.
70+
71+ @margin-note{Immediate instances of @racket[source] are not
72+ @tech[#:doc reference]{serializable}. The @racketmodname[xml] library
73+ only uses @tech[#:doc reference #:key "structure subtypes " ]{subtypes}
74+ of @racket[source].}
75+ }
5776
5877@deftogether[(
5978@defstruct[external-dtd ([system string?])]
6079@defstruct[(external-dtd/public external-dtd) ([public string?])]
6180@defstruct[(external-dtd/system external-dtd) ()]
81+ @defthing[no-external-dtd external-dtd? #:value (external-dtd "" )]
6282)]{
6383
64- Represents an externally defined DTD.}
84+ Represents an externally defined DTD.
85+
86+ As a special case , an immediate instance of @racket[external-dtd]
87+ represents the @emph{absence} of an external DTD, and its @racket[system]
88+ field is ignored. The @racket[no-external-dtd] value is provided
89+ for clarity, but any immediate instance of @racket[external-dtd]
90+ has the same meaning.
91+
92+ @examples[
93+ #:eval xml-eval
94+ (define (show-doctype name dtd)
95+ (write-xml (document (prolog '() (document-type name dtd #f ) '() )
96+ (element #f #f name '() '() )
97+ '() )))
98+ (show-doctype
99+ 'svg
100+ (external-dtd/public "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd "
101+ "-//W3C//DTD SVG 1.1//EN " ))
102+ (show-doctype 'greeting (external-dtd/system "hello.dtd " ))
103+ (show-doctype 'html (external-dtd "ignored " ))
104+ (show-doctype 'html no-external-dtd)
105+ ]
106+
107+ @history[
108+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
109+ #:changed "8.17.0.5 " @elem{Added @racket[no-external-dtd].}
110+ ]}
65111
66112@defstruct[document-type ([name symbol?]
67113 [external external-dtd?]
68114 [inlined #f ])]{
69115
70- Represents a document type.}
116+ Represents a document type. For examples, see @racket[external-dtd].
117+
118+ @history[
119+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
120+ ]}
71121
72122@defstruct[comment ([text string?])]{
73123
74- Represents a comment.}
124+ Represents a comment.
125+
126+ @history[
127+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
128+ ]}
75129
76130@defstruct[(p-i source) ([target-name symbol?]
77131 [instruction string?])]{
78132
79- Represents a processing instruction.}
133+ Represents a processing instruction.
134+
135+ @examples[
136+ #:eval xml-eval
137+ (write-xml (document (prolog (list (p-i #f #f 'xml "version=\"1.0\" " ))
138+ #f
139+ '() )
140+ (element #f #f 'x '() '() )
141+ '() ))
142+ ]
143+
144+ @history[
145+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
146+ ]}
80147
81148@defthing[misc/c contract?]{
82- Equivalent to @racket[(or/c comment? p-i?)]
149+ Equivalent to @racket[(or/c comment? p-i?)].
83150}
84151
85152@defstruct[prolog ([misc (listof misc/c)]
86153 [dtd (or/c document-type #f )]
87154 [misc2 (listof misc/c)])]{
88155Represents a document prolog.
89- }
156+
157+ @history[
158+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
159+ ]}
90160
91161@defstruct[document ([prolog prolog?]
92162 [element element?]
93163 [misc (listof misc/c)])]{
94- Represents a document.}
164+ Represents a document.
165+
166+ @history[
167+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
168+ ]}
95169
96170@defstruct[(element source) ([name symbol?]
97171 [attributes (listof attribute?)]
98172 [content (listof content/c)])]{
99- Represents an element.}
173+ Represents an element.
174+
175+ @history[
176+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
177+ ]}
100178
101179@defstruct[(attribute source) ([name symbol?] [value (or/c string? permissive/c)])]{
102180
103- Represents an attribute within an element.}
181+ Represents an attribute within an element.
182+
183+ @history[
184+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
185+ ]}
104186
105187@defthing[content/c contract?]{
106188 Equivalent to @racket[(or/c pcdata? element? entity? comment? cdata? p-i? permissive/c)].
107189}
108190
109191@defthing[permissive/c contract?]{
110- If @racket[(permissive-xexprs)] is @racket[#t ], then equivalent to @racket[any/c], otherwise equivalent to @racket[(make-none/c 'permissive )]}
192+ If @racket[(permissive-xexprs)] is @racket[#t ], then equivalent to @racket[any/c],
193+ otherwise equivalent to @racket[(make-none/c 'permissive )]}.
111194
112195@defproc[(valid-char? [x any/c]) boolean?]{
113196 Returns true if @racket[x] is an exact-nonnegative-integer whose character interpretation under UTF-8 is from the set ([#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]), in accordance with section 2.2 of the XML 1.1 spec.
114197}
115198
116199@defstruct[(entity source) ([text (or/c symbol? valid-char?)])]{
117200
118- Represents a symbolic or numerical entity.}
201+ Represents a symbolic @deftech{entity} reference
202+ or a numerical @deftech{character reference}.
203+
204+ As a special case , @racket[read-xml] parses references to the @deftech{predefined entities}
205+ into @racket[pcdata] values, so it does not generate @racket[entity] values containing
206+ @racket['lt ], @racket['gt ], @racket['amp ], @racket['apos ], or @racket['quot ].
207+ Nonetheless, such @racket[entity] values may be created programmatically.
208+
209+ @examples[
210+ #:eval xml-eval
211+ (for/list ([s '(lt gt amp apos quot)])
212+ (with-output-to-string
213+ (λ ()
214+ (write-xml/content (entity #f #f s)))))
215+ (read-xml/element
216+ (open-input-string "<x> <>&'" </x> " ))
217+ ]
218+
219+ @history[
220+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
221+ ]}
119222
120223@defstruct[(pcdata source) ([string string?])]{
121224
122- Represents PCDATA content.}
225+ Represents textual content, i.e@._ what the
226+ @link["https://www.w3.org/TR/REC-xml/#dt-chardata " ]{XML specification} calls
227+ @deftech{character data}.
228+
229+ More specifically, this library has several representations for
230+ @tech{character data} corresponding to different concrete syntaxes in XML.
231+ The @racket[pcdata] struct represents character data that is neither
232+ encoded by a @tech{character reference} or user-defined @tech{entity},
233+ which use @racket[entity], nor written in a @racket[cdata] section.
234+ References to the @tech{predefined entities} can be represented by either @racket[pcdata]
235+ or @racket[entity], but this library always uses @racket[pcdata] when parsing.
236+
237+ @margin-note{The @racket[pcdata] struct is a bit of a misnomer.
238+ In XML, @litchar{PCDATA} is a keyword used to declare that an element contains
239+ ``@link["https://www.w3.org/TR/REC-xml/#sec-mixed-content " ]{mixed content},''
240+ i.e@._ @tech{character data} potentially interspersed with child elements,
241+ but the @racket[pcdata] struct specifically represents @tech{character data}.
242+ Historically, the term meant ``parsed character data.''
243+ }
244+
245+ @history[
246+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
247+ ]}
123248
124249@defstruct[(cdata source) ([string string?])]{
125250
126- Represents CDATA content .
251+ Represents a CDATA section .
127252
128- The @racket[string ] field is assumed to be of the form
253+ The @racket[_string ] field is assumed to be of the form
129254@litchar{<![CDATA[}@nonterm{content}@litchar{]]>} with proper quoting
130255of @nonterm{content}. Otherwise, @racket[write-xml] generates
131- incorrect output.}
256+ ill-formed output.
257+
258+ @history[
259+ #:changed "8.17.0.5 " @elem{Added support for serialization with @racketmodname[racket/serialize].}
260+ ]}
261+
262+ @subsection{Exceptions}
132263
133264@defstruct[(exn:invalid-xexpr exn:fail) ([code any/c])]{
134265
@@ -140,6 +271,8 @@ of the input to @racket[validate-xexpr].}
140271 Raised by @racket[read-xml] when an error in the XML input is found.
141272}
142273
274+ @subsection{X-expressions}
275+
143276@defproc[(xexpr? [v any/c]) boolean?]{
144277
145278Returns @racket[#t ] if @racket[v] is a @tech{X-expression}, @racket[#f ] otherwise.
@@ -165,11 +298,14 @@ A pair represents an element, optionally with attributes. Each
165298attribute's name is represented by a symbol, and its value is
166299represented by a string.
167300
168- A @racket[_symbol] represents a symbolic entity. For example,
169- @racket['nbsp ] represents @litchar{ }.
301+ A @racket[_symbol] represents a symbolic @tech{entity} reference.
302+ For example, @racket['nbsp ] represents @litchar{ }.
303+ @margin-note{Note that @racket[string->xexpr] and other parsing
304+ procedures represent references to the @tech{predefined entities}
305+ as strings instead of symbols.}
170306
171- A @racket[valid-char?] represents a numeric entity. For example,
172- @racketvalfont{#x20 } represents @litchar{ }.
307+ A @racket[valid-char?] represents a numerical @tech{character reference}.
308+ For example, @racketvalfont{#x20 } represents @litchar{ }.
173309
174310A @racket[_cdata] is an instance of the @racket[cdata] structure type,
175311and a @racket[_misc] is an instance of the @racket[comment] or
@@ -181,7 +317,7 @@ and a @racket[_misc] is an instance of the @racket[comment] or
181317]}
182318
183319@defthing[xexpr/c contract?]{
184- A contract that is like @racket[xexpr?] except produces a better error
320+ A contract that is like @racket[xexpr?], but produces a better error
185321 message when the value is not an @tech{X-expression}.
186322
187323@history[
@@ -206,7 +342,7 @@ from @racketmodname[xml] with minimal dependencies.
206342@defproc[(read-xml [in input-port? (current-input-port)]) document?]{
207343
208344Reads in an XML document from the given or current input port. XML
209- documents contain exactly one element, raising @racket[xml-read:error ]
345+ documents contain exactly one element, raising @racket[exn:xml ]
210346if the input stream has zero elements or more than one element.
211347
212348Malformed xml is reported with source locations in the form
@@ -229,7 +365,9 @@ about creating ports that return non-character values.
229365
230366@defproc[(read-xml/document [in input-port? (current-input-port)]) document?]{
231367
232- Like @racket[read-xml], except that the reader stops after the single element, rather than attempting to read "miscellaneous " XML content after the element. The document returned by @racket[read-xml/document] always has an empty @racket[document-misc].}
368+ Like @racket[read-xml], except that the reader stops after the single element,
369+ rather than attempting to read ``miscellaneous'' XML content after the element.
370+ The document returned by @racket[read-xml/document] always has an empty @racket[document-misc].}
233371
234372@defproc[(read-xml/element [in input-port? (current-input-port)]) element?]{
235373
0 commit comments