Skip to content

Commit 05f58e3

Browse files
committed
wip: get initial version of h1 validation done
1 parent 1634c40 commit 05f58e3

File tree

2 files changed

+181
-17
lines changed

2 files changed

+181
-17
lines changed

cmd/readmevalidation/coderResources.go

Lines changed: 114 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package main
22

33
import (
4+
"bufio"
45
"errors"
56
"fmt"
67
"net/url"
8+
"regexp"
79
"strings"
810
)
911

@@ -94,25 +96,126 @@ func validateCoderResourceTags(tags []string) error {
9496
return nil
9597
}
9698

97-
func validateCoderResourceChanges(resource coderResourceReadme) []error {
99+
// Todo: This is a holdover from the validation logic used by the Coder Modules
100+
// repo. It gives us some assurance, but realistically, we probably want to
101+
// parse any Terraform code snippets, and make some deeper guarantees about how
102+
// it's structured. Just validating whether it *can* be parsed as Terraform
103+
// would be a big improvement.
104+
var terraformVersionRe = regexp.MustCompile("^\\bversion\\s+=")
105+
106+
// This validation function definitely has risks of false positives right now,
107+
// but realistically, it's not going to cause problems for launch. The most
108+
// foolproof way to rebuild this would be to parse each README into an AST, and
109+
// then parse each Terraform code block as Terraform
110+
func validateCoderResourceReadmeBody(body string) []error {
111+
trimmed := strings.TrimSpace(body)
112+
var errs []error
113+
errs = append(errs, validateReadmeBody(trimmed)...)
114+
115+
foundParagraph := false
116+
terraformCodeBlockCount := 0
117+
foundTerraformVersionRef := false
118+
119+
lineNum := 0
120+
isInsideCodeBlock := false
121+
isInsideTerraform := false
122+
123+
lineScanner := bufio.NewScanner(strings.NewReader(trimmed))
124+
for lineScanner.Scan() {
125+
lineNum++
126+
nextLine := lineScanner.Text()
127+
128+
// Code assumes that invalid headers would've already been handled by
129+
// the base validation function, so we don't need to check deeper if the
130+
// first line isn't an h1
131+
if lineNum == 1 && !strings.HasPrefix(nextLine, "# ") {
132+
break
133+
}
134+
135+
if nextLine == "```" {
136+
if !isInsideCodeBlock {
137+
errs = append(errs, fmt.Errorf("line %d: found stray ``` (either an extra code block terminator, or a code block header without a specified language)", lineNum))
138+
break
139+
}
140+
141+
isInsideCodeBlock = false
142+
isInsideTerraform = false
143+
continue
144+
}
145+
146+
if isInsideTerraform {
147+
foundTerraformVersionRef = foundTerraformVersionRef || terraformVersionRe.MatchString(nextLine)
148+
continue
149+
}
150+
if isInsideCodeBlock {
151+
continue
152+
}
153+
154+
// Code assumes that we can treat this case as the end of the "h1
155+
// section" and don't need to process any further lines
156+
if strings.HasPrefix(nextLine, "#") {
157+
break
158+
}
159+
160+
// This is meant to catch cases like ```tf, ```hcl, and ```js
161+
if strings.HasPrefix(nextLine, "```") {
162+
isInsideCodeBlock = true
163+
isInsideTerraform = strings.HasPrefix(nextLine, "```tf")
164+
if isInsideTerraform {
165+
terraformCodeBlockCount++
166+
}
167+
168+
if strings.HasPrefix(nextLine, "```hcl") {
169+
errs = append(errs, fmt.Errorf("line %d: all .hcl language references must be converted to .tf", lineNum))
170+
}
171+
172+
continue
173+
}
174+
175+
// Code assumes that if we've reached this point, the only other options
176+
// are: (1) empty spaces, (2) paragraphs, (3) HTML, and (4) asset
177+
// references made via [] syntax
178+
trimmedLine := strings.TrimSpace(nextLine)
179+
isParagraph := trimmedLine != "" && !strings.HasPrefix(trimmedLine, "[") && !strings.HasPrefix(trimmedLine, "<")
180+
foundParagraph = foundParagraph || isParagraph
181+
}
182+
183+
if terraformCodeBlockCount == 0 {
184+
errs = append(errs, errors.New("did not find Terraform code block within h1 section"))
185+
} else {
186+
if terraformCodeBlockCount > 1 {
187+
errs = append(errs, errors.New("cannot have more than one Terraform code block in h1 section"))
188+
}
189+
if !foundTerraformVersionRef {
190+
errs = append(errs, errors.New("did not find Terraform code block that specifies 'version' field"))
191+
}
192+
}
193+
if !foundParagraph {
194+
errs = append(errs, errors.New("did not find paragraph within h1 section"))
195+
}
196+
197+
return errs
198+
}
199+
200+
func validateCoderResourceReadme(rm coderResourceReadme) []error {
98201
var errs []error
99202

100-
if err := validateReadmeBody(resource.body); err != nil {
101-
errs = append(errs, addFilePathToError(resource.filePath, err))
203+
for _, err := range validateCoderResourceReadmeBody(rm.body) {
204+
errs = append(errs, addFilePathToError(rm.filePath, err))
102205
}
103206

104-
if err := validateCoderResourceDisplayName(resource.frontmatter.DisplayName); err != nil {
105-
errs = append(errs, addFilePathToError(resource.filePath, err))
207+
if err := validateCoderResourceDisplayName(rm.frontmatter.DisplayName); err != nil {
208+
errs = append(errs, addFilePathToError(rm.filePath, err))
106209
}
107-
if err := validateCoderResourceDescription(resource.frontmatter.Description); err != nil {
108-
errs = append(errs, addFilePathToError(resource.filePath, err))
210+
if err := validateCoderResourceDescription(rm.frontmatter.Description); err != nil {
211+
errs = append(errs, addFilePathToError(rm.filePath, err))
109212
}
110-
if err := validateCoderResourceTags(resource.frontmatter.Tags); err != nil {
111-
errs = append(errs, addFilePathToError(resource.filePath, err))
213+
if err := validateCoderResourceTags(rm.frontmatter.Tags); err != nil {
214+
errs = append(errs, addFilePathToError(rm.filePath, err))
112215
}
113216

114-
for _, err := range validateCoderResourceIconURL(resource.frontmatter.IconURL) {
115-
errs = append(errs, addFilePathToError(resource.filePath, err))
217+
for _, err := range validateCoderResourceIconURL(rm.frontmatter.IconURL) {
218+
errs = append(errs, addFilePathToError(rm.filePath, err))
116219
}
117220

118221
return errs

cmd/readmevalidation/readmeFiles.go

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bufio"
55
"errors"
66
"fmt"
7+
"regexp"
78
"strings"
89
)
910

@@ -31,9 +32,8 @@ func separateFrontmatter(readmeText string) (string, string, error) {
3132
fm := ""
3233
body := ""
3334
fenceCount := 0
34-
lineScanner := bufio.NewScanner(
35-
strings.NewReader(strings.TrimSpace(readmeText)),
36-
)
35+
36+
lineScanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(readmeText)))
3737
for lineScanner.Scan() {
3838
nextLine := lineScanner.Text()
3939
if fenceCount < 2 && nextLine == fence {
@@ -66,12 +66,73 @@ func separateFrontmatter(readmeText string) (string, string, error) {
6666
return fm, strings.TrimSpace(body), nil
6767
}
6868

69-
func validateReadmeBody(body string) error {
69+
var readmeHeaderRe = regexp.MustCompile("^(#{1,})(\\s*)")
70+
71+
// Todo: This is a little chaotic, and might have some risks of false positives.
72+
// Might be better to bring in an
73+
func validateReadmeBody(body string) []error {
7074
trimmed := strings.TrimSpace(body)
75+
var errs []error
76+
77+
// If the very first line of the README, there's a risk that the rest of the
78+
// validation logic will break, since we don't have many guarantees about
79+
// how the README is actually structured
7180
if !strings.HasPrefix(trimmed, "# ") {
72-
return errors.New("README body must start with ATX-style h1 header (i.e., \"# \")")
81+
errs = append(errs, errors.New("README body must start with ATX-style h1 header (i.e., \"# \")"))
82+
return errs
83+
}
84+
85+
lineNum := 0
86+
lastHeaderLevel := 0
87+
foundFirstH1 := false
88+
89+
lineScanner := bufio.NewScanner(strings.NewReader(trimmed))
90+
for lineScanner.Scan() {
91+
lineNum++
92+
nextLine := lineScanner.Text()
93+
94+
headerGroups := readmeHeaderRe.FindStringSubmatch(nextLine)
95+
if headerGroups == nil {
96+
continue
97+
}
98+
99+
spaceAfterHeader := headerGroups[2]
100+
if spaceAfterHeader == "" {
101+
errs = append(errs, fmt.Errorf("line %d: header does not have space between header characters and main header text", lineNum))
102+
}
103+
104+
nextHeaderLevel := len(headerGroups[1])
105+
if nextHeaderLevel == 1 && !foundFirstH1 {
106+
foundFirstH1 = true
107+
lastHeaderLevel = 1
108+
continue
109+
}
110+
111+
// If we have obviously invalid headers, it's not really safe to keep
112+
// proceeding with the rest of the content
113+
if nextHeaderLevel == 1 {
114+
errs = append(errs, errors.New("READMEs cannot contain more than h1 header"))
115+
break
116+
}
117+
if nextHeaderLevel > 6 {
118+
errs = append(errs, fmt.Errorf("line %d: README/HTML files cannot have headers exceed level 6 (found level %d)", lineNum, nextHeaderLevel))
119+
break
120+
}
121+
122+
// This is something we need to enforce for accessibility, not just for
123+
// the Registry website, but also when users are viewing the README
124+
// files in the GitHub web view
125+
if nextHeaderLevel > lastHeaderLevel && nextHeaderLevel != (lastHeaderLevel+1) {
126+
errs = append(errs, fmt.Errorf("line %d: headers are not allowed to increase more than 1 level at a time", lineNum))
127+
continue
128+
}
129+
130+
// As long as the above condition passes, there's no problems with
131+
// going up a header level or going down 1+ header levels
132+
lastHeaderLevel = nextHeaderLevel
73133
}
74-
return nil
134+
135+
return errs
75136
}
76137

77138
// validationPhase represents a specific phase during README validation. It is

0 commit comments

Comments
 (0)