Skip to content

Commit fcf9d37

Browse files
authored
Comments support in unstable/Parser (#860)
1 parent 986afff commit fcf9d37

File tree

3 files changed

+267
-20
lines changed

3 files changed

+267
-20
lines changed

unstable/parser.go

Lines changed: 110 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,15 @@ func NewParserError(highlight []byte, format string, args ...interface{}) error
4949
// For performance reasons, go-toml doesn't make a copy of the input bytes to
5050
// the parser. Make sure to copy all the bytes you need to outlive the slice
5151
// given to the parser.
52-
//
53-
// The parser doesn't provide nodes for comments yet, nor for whitespace.
5452
type Parser struct {
5553
data []byte
5654
builder builder
5755
ref reference
5856
left []byte
5957
err error
6058
first bool
59+
60+
KeepComments bool
6161
}
6262

6363
// Data returns the slice provided to the last call to Reset.
@@ -142,6 +142,44 @@ func (p *Parser) Error() error {
142142
return p.err
143143
}
144144

145+
// Position describes a position in the input.
146+
type Position struct {
147+
// Number of bytes from the beginning of the input.
148+
Offset int
149+
// Line number, starting at 1.
150+
Line int
151+
// Column number, starting at 1.
152+
Column int
153+
}
154+
155+
// Shape describes the position of a range in the input.
156+
type Shape struct {
157+
Start Position
158+
End Position
159+
}
160+
161+
func (p *Parser) position(b []byte) Position {
162+
offset := danger.SubsliceOffset(p.data, b)
163+
164+
lead := p.data[:offset]
165+
166+
return Position{
167+
Offset: offset,
168+
Line: bytes.Count(lead, []byte{'\n'}) + 1,
169+
Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
170+
}
171+
}
172+
173+
// Shape returns the shape of the given range in the input. Will
174+
// panic if the range is not a subslice of the input.
175+
func (p *Parser) Shape(r Range) Shape {
176+
raw := p.Raw(r)
177+
return Shape{
178+
Start: p.position(raw),
179+
End: p.position(raw[r.Length:]),
180+
}
181+
}
182+
145183
func (p *Parser) parseNewline(b []byte) ([]byte, error) {
146184
if b[0] == '\n' {
147185
return b[1:], nil
@@ -155,6 +193,19 @@ func (p *Parser) parseNewline(b []byte) ([]byte, error) {
155193
return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
156194
}
157195

196+
func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
197+
ref := invalidReference
198+
data, rest, err := scanComment(b)
199+
if p.KeepComments && err == nil {
200+
ref = p.builder.Push(Node{
201+
Kind: Comment,
202+
Raw: p.Range(data),
203+
Data: data,
204+
})
205+
}
206+
return ref, rest, err
207+
}
208+
158209
func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
159210
// expression = ws [ comment ]
160211
// expression =/ ws keyval ws [ comment ]
@@ -168,7 +219,7 @@ func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
168219
}
169220

170221
if b[0] == '#' {
171-
_, rest, err := scanComment(b)
222+
ref, rest, err := p.parseComment(b)
172223
return ref, rest, err
173224
}
174225

@@ -190,7 +241,10 @@ func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
190241
b = p.parseWhitespace(b)
191242

192243
if len(b) > 0 && b[0] == '#' {
193-
_, rest, err := scanComment(b)
244+
cref, rest, err := p.parseComment(b)
245+
if cref != invalidReference {
246+
p.builder.Chain(ref, cref)
247+
}
194248
return ref, rest, err
195249
}
196250

@@ -471,17 +525,33 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
471525
Kind: Array,
472526
})
473527

528+
// First indicates whether the parser is looking for the first element
529+
// (non-comment) of the array.
474530
first := true
475531

476-
var lastChild reference
532+
lastChild := invalidReference
533+
534+
addChild := func(valueRef reference) {
535+
if lastChild == invalidReference {
536+
p.builder.AttachChild(parent, valueRef)
537+
} else {
538+
p.builder.Chain(lastChild, valueRef)
539+
}
540+
lastChild = valueRef
541+
}
477542

478543
var err error
479544
for len(b) > 0 {
480-
b, err = p.parseOptionalWhitespaceCommentNewline(b)
545+
cref := invalidReference
546+
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
481547
if err != nil {
482548
return parent, nil, err
483549
}
484550

551+
if cref != invalidReference {
552+
addChild(cref)
553+
}
554+
485555
if len(b) == 0 {
486556
return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
487557
}
@@ -496,10 +566,13 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
496566
}
497567
b = b[1:]
498568

499-
b, err = p.parseOptionalWhitespaceCommentNewline(b)
569+
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
500570
if err != nil {
501571
return parent, nil, err
502572
}
573+
if cref != invalidReference {
574+
addChild(cref)
575+
}
503576
} else if !first {
504577
return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
505578
}
@@ -515,17 +588,16 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
515588
return parent, nil, err
516589
}
517590

518-
if first {
519-
p.builder.AttachChild(parent, valueRef)
520-
} else {
521-
p.builder.Chain(lastChild, valueRef)
522-
}
523-
lastChild = valueRef
591+
addChild(valueRef)
524592

525-
b, err = p.parseOptionalWhitespaceCommentNewline(b)
593+
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
526594
if err != nil {
527595
return parent, nil, err
528596
}
597+
if cref != invalidReference {
598+
addChild(cref)
599+
}
600+
529601
first = false
530602
}
531603

@@ -534,15 +606,34 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
534606
return parent, rest, err
535607
}
536608

537-
func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) {
609+
func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
610+
rootCommentRef := invalidReference
611+
latestCommentRef := invalidReference
612+
613+
addComment := func(ref reference) {
614+
if rootCommentRef == invalidReference {
615+
rootCommentRef = ref
616+
} else if latestCommentRef == invalidReference {
617+
p.builder.AttachChild(rootCommentRef, ref)
618+
latestCommentRef = ref
619+
} else {
620+
p.builder.Chain(latestCommentRef, ref)
621+
latestCommentRef = ref
622+
}
623+
}
624+
538625
for len(b) > 0 {
539626
var err error
540627
b = p.parseWhitespace(b)
541628

542629
if len(b) > 0 && b[0] == '#' {
543-
_, b, err = scanComment(b)
630+
var ref reference
631+
ref, b, err = p.parseComment(b)
544632
if err != nil {
545-
return nil, err
633+
return invalidReference, nil, err
634+
}
635+
if ref != invalidReference {
636+
addComment(ref)
546637
}
547638
}
548639

@@ -553,14 +644,14 @@ func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error)
553644
if b[0] == '\n' || b[0] == '\r' {
554645
b, err = p.parseNewline(b)
555646
if err != nil {
556-
return nil, err
647+
return invalidReference, nil, err
557648
}
558649
} else {
559650
break
560651
}
561652
}
562653

563-
return b, nil
654+
return rootCommentRef, b, nil
564655
}
565656

566657
func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {

unstable/parser_test.go

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,163 @@ func TestParser_AST_DateTimes(t *testing.T) {
448448
}
449449
}
450450

451+
// This example demonstrates how to parse a TOML document and preserving
452+
// comments. Comments are stored in the AST as Comment nodes. This example
453+
// displays the structure of the full AST generated by the parser using the
454+
// following structure:
455+
//
456+
// 1. Each root-level expression is separated by three dashes.
457+
// 2. Bytes associated to a node are displayed in square brackets.
458+
// 3. Siblings have the same indentation.
459+
// 4. Children of a node are indented one level.
460+
func ExampleParser_comments() {
461+
doc := `# Top of the document comment.
462+
# Optional, any amount of lines.
463+
464+
# Above table.
465+
[table] # Next to table.
466+
# Above simple value.
467+
key = "value" # Next to simple value.
468+
# Below simple value.
469+
470+
# Some comment alone.
471+
472+
# Multiple comments, on multiple lines.
473+
474+
# Above inline table.
475+
name = { first = "Tom", last = "Preston-Werner" } # Next to inline table.
476+
# Below inline table.
477+
478+
# Above array.
479+
array = [ 1, 2, 3 ] # Next to one-line array.
480+
# Below array.
481+
482+
# Above multi-line array.
483+
key5 = [ # Next to start of inline array.
484+
# Second line before array content.
485+
1, # Next to first element.
486+
# After first element.
487+
# Before second element.
488+
2,
489+
3, # Next to last element
490+
# After last element.
491+
] # Next to end of array.
492+
# Below multi-line array.
493+
494+
# Before array table.
495+
[[products]] # Next to array table.
496+
# After array table.
497+
`
498+
499+
var printGeneric func(*Parser, int, *Node)
500+
printGeneric = func(p *Parser, indent int, e *Node) {
501+
if e == nil {
502+
return
503+
}
504+
s := p.Shape(e.Raw)
505+
x := fmt.Sprintf("%d:%d->%d:%d (%d->%d)", s.Start.Line, s.Start.Column, s.End.Line, s.End.Column, s.Start.Offset, s.End.Offset)
506+
fmt.Printf("%-25s | %s%s [%s]\n", x, strings.Repeat(" ", indent), e.Kind, e.Data)
507+
printGeneric(p, indent+1, e.Child())
508+
printGeneric(p, indent, e.Next())
509+
}
510+
511+
printTree := func(p *Parser) {
512+
for p.NextExpression() {
513+
e := p.Expression()
514+
fmt.Println("---")
515+
printGeneric(p, 0, e)
516+
}
517+
if err := p.Error(); err != nil {
518+
panic(err)
519+
}
520+
}
521+
522+
p := &Parser{
523+
KeepComments: true,
524+
}
525+
p.Reset([]byte(doc))
526+
printTree(p)
527+
528+
// Output:
529+
// ---
530+
// 1:1->1:31 (0->30) | Comment [# Top of the document comment.]
531+
// ---
532+
// 2:1->2:33 (31->63) | Comment [# Optional, any amount of lines.]
533+
// ---
534+
// 4:1->4:15 (65->79) | Comment [# Above table.]
535+
// ---
536+
// 1:1->1:1 (0->0) | Table []
537+
// 5:2->5:7 (81->86) | Key [table]
538+
// 5:9->5:25 (88->104) | Comment [# Next to table.]
539+
// ---
540+
// 6:1->6:22 (105->126) | Comment [# Above simple value.]
541+
// ---
542+
// 1:1->1:1 (0->0) | KeyValue []
543+
// 7:7->7:14 (133->140) | String [value]
544+
// 7:1->7:4 (127->130) | Key [key]
545+
// 7:15->7:38 (141->164) | Comment [# Next to simple value.]
546+
// ---
547+
// 8:1->8:22 (165->186) | Comment [# Below simple value.]
548+
// ---
549+
// 10:1->10:22 (188->209) | Comment [# Some comment alone.]
550+
// ---
551+
// 12:1->12:40 (211->250) | Comment [# Multiple comments, on multiple lines.]
552+
// ---
553+
// 14:1->14:22 (252->273) | Comment [# Above inline table.]
554+
// ---
555+
// 1:1->1:1 (0->0) | KeyValue []
556+
// 15:8->15:9 (281->282) | InlineTable []
557+
// 1:1->1:1 (0->0) | KeyValue []
558+
// 15:18->15:23 (291->296) | String [Tom]
559+
// 15:10->15:15 (283->288) | Key [first]
560+
// 1:1->1:1 (0->0) | KeyValue []
561+
// 15:32->15:48 (305->321) | String [Preston-Werner]
562+
// 15:25->15:29 (298->302) | Key [last]
563+
// 15:1->15:5 (274->278) | Key [name]
564+
// 15:51->15:74 (324->347) | Comment [# Next to inline table.]
565+
// ---
566+
// 16:1->16:22 (348->369) | Comment [# Below inline table.]
567+
// ---
568+
// 18:1->18:15 (371->385) | Comment [# Above array.]
569+
// ---
570+
// 1:1->1:1 (0->0) | KeyValue []
571+
// 1:1->1:1 (0->0) | Array []
572+
// 1:1->1:1 (0->0) | Integer [1]
573+
// 1:1->1:1 (0->0) | Integer [2]
574+
// 1:1->1:1 (0->0) | Integer [3]
575+
// 19:1->19:6 (386->391) | Key [array]
576+
// 19:21->19:46 (406->431) | Comment [# Next to one-line array.]
577+
// ---
578+
// 20:1->20:15 (432->446) | Comment [# Below array.]
579+
// ---
580+
// 22:1->22:26 (448->473) | Comment [# Above multi-line array.]
581+
// ---
582+
// 1:1->1:1 (0->0) | KeyValue []
583+
// 1:1->1:1 (0->0) | Array []
584+
// 23:10->23:42 (483->515) | Comment [# Next to start of inline array.]
585+
// 24:3->24:38 (518->553) | Comment [# Second line before array content.]
586+
// 1:1->1:1 (0->0) | Integer [1]
587+
// 25:6->25:30 (559->583) | Comment [# Next to first element.]
588+
// 26:3->26:25 (586->608) | Comment [# After first element.]
589+
// 27:3->27:27 (611->635) | Comment [# Before second element.]
590+
// 1:1->1:1 (0->0) | Integer [2]
591+
// 1:1->1:1 (0->0) | Integer [3]
592+
// 29:6->29:28 (646->668) | Comment [# Next to last element]
593+
// 30:3->30:24 (671->692) | Comment [# After last element.]
594+
// 23:1->23:5 (474->478) | Key [key5]
595+
// 31:3->31:26 (695->718) | Comment [# Next to end of array.]
596+
// ---
597+
// 32:1->32:26 (719->744) | Comment [# Below multi-line array.]
598+
// ---
599+
// 34:1->34:22 (746->767) | Comment [# Before array table.]
600+
// ---
601+
// 1:1->1:1 (0->0) | ArrayTable []
602+
// 35:3->35:11 (770->778) | Key [products]
603+
// 35:14->35:36 (781->803) | Comment [# Next to array table.]
604+
// ---
605+
// 36:1->36:21 (804->824) | Comment [# After array table.]
606+
}
607+
451608
func ExampleParser() {
452609
doc := `
453610
hello = "world"

0 commit comments

Comments
 (0)