-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathvalidation.go
More file actions
201 lines (166 loc) · 7.81 KB
/
validation.go
File metadata and controls
201 lines (166 loc) · 7.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
//go:build goexperiment.simd && amd64
package simdcsv
import "bytes"
// =============================================================================
// Validation Policy - Configurable behavior decisions
// =============================================================================
// validationPolicy encapsulates validation behavior decisions.
// This separates "what to validate" (policy) from "how to validate" (mechanism).
type validationPolicy struct {
trimLeadingSpace bool
comma rune
}
// newValidationPolicy creates a policy from Reader configuration.
func (r *Reader) newValidationPolicy() validationPolicy {
return validationPolicy{
trimLeadingSpace: r.TrimLeadingSpace,
comma: r.Comma,
}
}
// shouldUseMetadata determines if SIMD-parsed metadata can be used for validation.
// Returns false when TrimLeadingSpace is enabled because metadata doesn't account
// for whitespace offset adjustments.
func (p validationPolicy) shouldUseMetadata(field fieldInfo) bool {
return field.flags&fieldFlagIsQuoted != 0 && !p.trimLeadingSpace
}
// =============================================================================
// Field Extraction - Mechanism for accessing raw field data
// =============================================================================
// extractFieldBytes returns the raw bytes for a field, or (nil, false) if bounds are invalid.
func (r *Reader) extractFieldBytes(rawStart, rawEnd uint64) ([]byte, bool) {
bufLen := uint64(len(r.state.rawBuffer))
if rawStart >= bufLen || rawEnd > bufLen || rawStart >= rawEnd {
return nil, false
}
return r.state.rawBuffer[rawStart:rawEnd], true
}
// =============================================================================
// Field Quote Validation - Entry points
// =============================================================================
// validateFieldQuotesWithField validates quote usage in a field using field metadata when available.
func (r *Reader) validateFieldQuotesWithField(field fieldInfo, rawStart, rawEnd uint64, lineNum int) error {
raw, ok := r.extractFieldBytes(rawStart, rawEnd)
if !ok {
return nil
}
policy := r.newValidationPolicy()
return r.dispatchFieldValidation(raw, rawStart, field, lineNum, policy)
}
// dispatchFieldValidation routes to the appropriate validation path based on field type.
func (r *Reader) dispatchFieldValidation(raw []byte, rawStart uint64, field fieldInfo, lineNum int, policy validationPolicy) error {
// Fast path: use isQuoted flag from parsed field metadata (set during SIMD scan)
if policy.shouldUseMetadata(field) {
return r.validateQuotedFieldFromMetadata(raw, rawStart, field, lineNum)
}
// Determine if field is quoted (handles TrimLeadingSpace case)
isQuoted, quoteOffset := isQuotedFieldStart(raw, policy.trimLeadingSpace)
if isQuoted {
adjustedRaw := raw[quoteOffset:]
adjustedStart := rawStart + uint64(quoteOffset) //nolint:gosec // G115
return r.validateQuotedField(adjustedRaw, adjustedStart, lineNum)
}
return r.validateUnquotedField(raw, rawStart, lineNum)
}
// =============================================================================
// Quoted Field Validation - Using SIMD metadata
// =============================================================================
// validateQuotedFieldFromMetadata validates a quoted field using SIMD-parsed metadata.
// This avoids re-scanning for quotes since the parser already identified the structure.
// raw is the full field content including quotes; rawStart is its absolute position.
func (r *Reader) validateQuotedFieldFromMetadata(raw []byte, rawStart uint64, field fieldInfo, lineNum int) error {
// Step 1: Check minimum length requirement
if !hasMinimumLength(raw, 2) {
return r.quoteErrorAt(lineNum, rawStart, len(raw))
}
// Step 2: Verify opening quote
if !hasOpeningQuote(raw) {
return r.quoteErrorAt(lineNum, rawStart, 1)
}
// Step 3: Verify closing quote at expected position
// field.length is content length (between quotes), so closing quote is at length + 1
closingIdx := int(field.length) + 1
if !hasClosingQuoteAt(raw, closingIdx) {
return r.quoteErrorAt(lineNum, rawStart, min(closingIdx+1, len(raw)))
}
// Step 4: Validate nothing invalid follows the closing quote
if !r.isValidAfterClosingQuote(raw, closingIdx) {
return r.quoteErrorAt(lineNum, rawStart, closingIdx+2)
}
return nil
}
// =============================================================================
// Quoted Field Validation - Full scan
// =============================================================================
// validateQuotedField validates a field that starts with a quote.
// raw should start with the opening quote.
func (r *Reader) validateQuotedField(raw []byte, rawStart uint64, lineNum int) error {
closingQuoteIdx := findClosingQuote(raw, 1)
if closingQuoteIdx == -1 {
return r.quoteErrorAt(lineNum, rawStart, len(raw))
}
if !r.isValidAfterClosingQuote(raw, closingQuoteIdx) {
return r.quoteErrorAt(lineNum, rawStart, closingQuoteIdx+2)
}
return nil
}
// =============================================================================
// Unquoted Field Validation
// =============================================================================
// validateUnquotedField validates a field that does not start with a quote.
// Reports ErrBareQuote if quotes appear in unquoted fields.
func (r *Reader) validateUnquotedField(raw []byte, rawStart uint64, lineNum int) error {
quotePos := bytes.IndexByte(raw, '"')
if quotePos == -1 {
return nil
}
col := int(rawStart) + quotePos + 1 //nolint:gosec // G115
return &ParseError{StartLine: lineNum, Line: lineNum, Column: col, Err: ErrBareQuote}
}
// =============================================================================
// Quote Structure Validators - Single responsibility functions
// =============================================================================
// hasMinimumLength checks if data has at least minLen bytes.
func hasMinimumLength(data []byte, minLen int) bool {
return len(data) >= minLen
}
// hasOpeningQuote checks if the first byte is a quote character.
func hasOpeningQuote(data []byte) bool {
return len(data) > 0 && data[0] == '"'
}
// hasClosingQuoteAt checks if there is a quote at the expected position.
func hasClosingQuoteAt(data []byte, closingIdx int) bool {
return closingIdx < len(data) && data[closingIdx] == '"'
}
// isValidAfterClosingQuote checks that nothing unexpected follows the closing quote.
func (r *Reader) isValidAfterClosingQuote(data []byte, closingIdx int) bool {
afterClose := closingIdx + 1
if afterClose >= len(data) {
return true // Nothing after closing quote is valid
}
return isFieldTerminator(data[afterClose], r.Comma)
}
// =============================================================================
// Field Terminator Detection - Mechanism
// =============================================================================
// isFieldTerminator reports whether b is a valid field terminator.
// Valid terminators are: newline (\n), carriage return (\r), or the configured comma.
// The literal comma (',') is always accepted for backward compatibility with RFC 4180.
func isFieldTerminator(b byte, comma rune) bool {
switch b {
case '\n', '\r':
return true
case ',':
return true // Always accept comma for backward compatibility
default:
return b == byte(comma)
}
}
// =============================================================================
// Error Helpers
// =============================================================================
// quoteErrorAt returns a ParseError for quote-related validation failures.
// offset is the position within the field (0-indexed), added to rawStart for the column.
func (r *Reader) quoteErrorAt(lineNum int, rawStart uint64, offset int) *ParseError {
col := int(rawStart) + offset //nolint:gosec // G115
return &ParseError{StartLine: lineNum, Line: lineNum, Column: col, Err: ErrQuote}
}