nnnkkk7
diff --git a/‎errors.go‎
Lines changed: 1 addition & 1 deletion b/‎errors.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎field_parser.go‎
Lines changed: 6 additions & 7 deletions b/‎field_parser.go‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎parse.go‎
Lines changed: 7 additions & 7 deletions b/‎parse.go‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎quote.go‎
Lines changed: 15 additions & 13 deletions b/‎quote.go‎
Lines changed: 15 additions & 13 deletions
diff --git a/‎reader.go‎
Lines changed: 51 additions & 55 deletions b/‎reader.go‎
Lines changed: 51 additions & 55 deletions
@@ -14,7 +14,7 @@ var (
 	ErrFieldCount = errors.New("wrong number of fields")
 )
 
-// ParseError represents a parsing error with location information
+// ParseError represents a parsing error with location information.
 type ParseError struct {
 	StartLine int   // Record start line
 	Line      int   // Error line
 
@@ -59,9 +59,8 @@ func parseBuffer(buf []byte, sr *scanResult) *parseResult {
 	}
 
 	// Initialize result with estimated capacities
-	// Assume average field length of 10 bytes and row length of 50 bytes
-	estimatedFields := len(buf) / 10
-	estimatedRows := len(buf) / 50
+	estimatedFields := len(buf) / avgFieldLenEstimate
+	estimatedRows := len(buf) / avgRowLenEstimate
 	result := newParseResult(estimatedFields, estimatedRows)
 
 	// Initialize state with lastSeparatorOrDelimiter = -1
@@ -74,7 +73,7 @@ func parseBuffer(buf []byte, sr *scanResult) *parseResult {
 
 	// Loop through all chunks, calling processChunkMasks for each
 	for chunkIdx := 0; chunkIdx < sr.chunkCount; chunkIdx++ {
-		offset := uint64(chunkIdx * 64)
+		offset := uint64(chunkIdx * simdChunkSize)
 		sepMask := sr.separatorMasks[chunkIdx]
 		nlMask := sr.newlineMasks[chunkIdx]
 
@@ -258,8 +257,8 @@ func finalizeLastField(buf []byte, state *parserState, result *parseResult, curr
 // unescapeDoubleQuotes converts double quotes ("") to single quotes (").
 // Dispatches to SIMD or scalar implementation based on CPU support and string size.
 func unescapeDoubleQuotes(s string) string {
-	// Use SIMD for strings >= 32 bytes
-	if useAVX512 && len(s) >= 32 {
+	// Use SIMD for strings >= simdMinThreshold bytes
+	if useAVX512 && len(s) >= simdMinThreshold {
 		return unescapeDoubleQuotesSIMD(s)
 	}
 	return unescapeDoubleQuotesScalar(s)
@@ -379,7 +378,7 @@ func postProcessFields(buf []byte, result *parseResult, postProcChunks []int) {
 
 	// For each chunk that needs post-processing, find overlapping fields
 	for _, chunkIdx := range postProcChunks {
-		chunkStart := uint64(chunkIdx * 64)
+		chunkStart := uint64(chunkIdx * simdChunkSize)
 		chunkEnd := chunkStart + 64
 
 		// Search for fields that start within this chunk range
 
@@ -3,20 +3,20 @@
 package simdcsv
 
 // ParseBytes parses a byte slice directly (zero-copy).
-// This function runs Stage 1 and Stage 2 processing and returns all records.
+// This function runs scanBuffer and parseBuffer processing and returns all records.
 func ParseBytes(data []byte, comma rune) ([][]string, error) {
 	if len(data) == 0 {
 		return nil, nil
 	}
 
-	// Stage 1: Structural analysis using SIMD (generates bitmasks)
+	// Scan: Structural analysis using SIMD (generates bitmasks)
 	separatorChar := byte(comma)
 	sr := scanBuffer(data, separatorChar)
 
-	// Stage 2: Extract fields and rows from scan result
+	// Parse: Extract fields and rows from scan result
 	pr := parseBuffer(data, sr)
 
-	// Stage 3: Convert parseResult to [][]string
+	// Build: Convert parseResult to [][]string
 	return buildRecords(data, pr), nil
 }
 
@@ -28,18 +28,18 @@ func ParseBytesStreaming(data []byte, comma rune, callback func([]string) error)
 		return nil
 	}
 
-	// Stage 1: Structural analysis using SIMD (generates bitmasks)
+	// Scan: Structural analysis using SIMD (generates bitmasks)
 	separatorChar := byte(comma)
 	sr := scanBuffer(data, separatorChar)
 
-	// Stage 2: Extract fields and rows from scan result
+	// Parse: Extract fields and rows from scan result
 	pr := parseBuffer(data, sr)
 
 	if pr == nil || len(pr.rows) == 0 {
 		return nil
 	}
 
-	// Stage 3: Invoke callback for each record
+	// Build: Invoke callback for each record
 	for _, row := range pr.rows {
 		record := buildRecord(data, pr, row)
 		if err := callback(record); err != nil {
 
@@ -46,8 +46,8 @@ func isQuotedFieldStart(data []byte, trimLeadingSpace bool) (bool, int) {
 // Dispatches to SIMD or scalar implementation based on CPU support and data size.
 func findClosingQuote(data []byte, startAfterOpenQuote int) int {
 	remaining := len(data) - startAfterOpenQuote
-	// Use SIMD for data >= 32 bytes, otherwise scalar is faster
-	if useAVX512 && remaining >= 32 {
+	// Use SIMD for data >= simdMinThreshold bytes, otherwise scalar is faster
+	if useAVX512 && remaining >= simdMinThreshold {
 		return findClosingQuoteSIMD(data, startAfterOpenQuote)
 	}
 	return findClosingQuoteScalar(data, startAfterOpenQuote)
@@ -72,14 +72,14 @@ func findClosingQuoteScalar(data []byte, startAfterOpenQuote int) int {
 }
 
 // findClosingQuoteSIMD uses SIMD to find the closing quote.
-// It searches for quote characters in 32-byte chunks using AVX-512.
+// It searches for quote characters in simdHalfChunk-byte chunks using AVX-512.
 func findClosingQuoteSIMD(data []byte, startAfterOpenQuote int) int {
 	quoteCmp := archsimd.BroadcastInt8x32('"')
 	i := startAfterOpenQuote
 
-	// Process 32-byte chunks
-	for i+32 <= len(data) {
-		chunk := archsimd.LoadInt8x32((*[32]int8)(unsafe.Pointer(&data[i])))
+	// Process simdHalfChunk-byte chunks
+	for i+simdHalfChunk <= len(data) {
+		chunk := archsimd.LoadInt8x32((*[simdHalfChunk]int8)(unsafe.Pointer(&data[i])))
 		mask := chunk.Equal(quoteCmp).ToBits()
 
 		if mask != 0 {
@@ -94,12 +94,15 @@ func findClosingQuoteSIMD(data []byte, startAfterOpenQuote int) int {
 					// This is an escaped quote, skip both quotes
 					// Clear this bit and the next (if in same chunk)
 					mask &= ^(uint32(1) << pos)
-					if pos+1 < 32 {
+					if pos+1 < simdHalfChunk {
 						mask &= ^(uint32(1) << (pos + 1))
 					}
-					// If next quote is in the next chunk, we need to skip it
-					if pos == 31 {
-						i += 32
+					// If next quote is in the next chunk, we need to skip it.
+					// Using goto here for performance: it allows us to skip the normal
+					// i += simdHalfChunk increment and immediately continue with the
+					// already-adjusted i value after handling boundary double quotes.
+					if pos == simdHalfChunk-1 {
+						i += simdHalfChunk
 						// Skip the first quote of the next iteration
 						if i < len(data) && data[i] == '"' {
 							i++
@@ -112,7 +115,7 @@ func findClosingQuoteSIMD(data []byte, startAfterOpenQuote int) int {
 				return absPos
 			}
 		}
-		i += 32
+		i += simdHalfChunk
 	continueLoop:
 	}
 
@@ -127,6 +130,5 @@ func extractQuotedContent(data []byte, closingQuoteIdx int) string {
 	if closingQuoteIdx <= 1 {
 		return ""
 	}
-	content := string(data[1:closingQuoteIdx])
-	return content
+	return string(data[1:closingQuoteIdx])
 }
@@ -59,16 +59,20 @@ type Reader struct {
 	lastRecord     []string
 
 	// SIMD processing state
-	scanResult         *scanResult  // Scan result (structural character masks)
-	parseResult        *parseResult // Parse result (extracted fields/rows)
-	currentRecordIndex int          // Current record index in parseResult.rows
-	initialized        bool         // Whether scan/parse have been run
+	scanResult            *scanResult  // Scan result (structural character masks)
+	parseResult           *parseResult // Parse result (extracted fields/rows)
+	currentRecordIndex    int          // Current record index in parseResult.rows
+	nonCommentRecordCount int          // Count of non-comment records returned (for O(1) first record detection)
+	initialized           bool         // Whether scan/parse have been run
 
 	// Extended options (set via NewReaderWithOptions)
-	skipBOM    bool // Skip UTF-8 BOM if present
-	bufferSize int  // Buffer size hint (reserved for future use)
-	chunkSize  int  // Chunk size hint (reserved for future use)
-	zeroCopy   bool // Zero-copy mode hint (reserved for future use)
+	skipBOM bool // Skip UTF-8 BOM if present
+
+	// Reserved fields for future streaming/chunked processing implementation.
+	// These fields are accepted by NewReaderWithOptions but currently have no effect.
+	bufferSize int  // Buffer size hint (not yet implemented)
+	chunkSize  int  // Chunk size hint (not yet implemented)
+	zeroCopy   bool // Zero-copy mode hint (not yet implemented)
 }
 
 // position represents a position in the input.
@@ -95,7 +99,7 @@ func NewReader(r io.Reader) *Reader {
 // If ReuseRecord is true, the returned slice may be shared
 // between multiple calls to Read.
 func (r *Reader) Read() (record []string, err error) {
-	// Initialize on first call: read all input and run Stage 1 + Stage 2
+	// Initialize on first call: read all input and run scanBuffer + parseBuffer
 	if !r.initialized {
 		if err := r.initialize(); err != nil {
 			return nil, err
@@ -108,18 +112,19 @@ func (r *Reader) Read() (record []string, err error) {
 			return nil, io.EOF
 		}
 
-		// Get current row info
-		rowInfo := r.parseResult.rows[r.currentRecordIndex]
+		// Get current row info and index
+		rowIdx := r.currentRecordIndex
+		rowInfo := r.parseResult.rows[rowIdx]
 		r.currentRecordIndex++
 
 		// Check for comment line (line starting with Comment character)
-		if r.Comment != 0 && r.isCommentLine(rowInfo) {
+		if r.Comment != 0 && r.isCommentLine(rowInfo, rowIdx) {
 			// Skip this line and continue to next
 			continue
 		}
 
 		// Build record from fields with validation
-		record, err = r.buildRecordWithValidation(rowInfo)
+		record, err = r.buildRecordWithValidation(rowInfo, rowIdx)
 		if err != nil {
 			return record, err
 		}
@@ -152,24 +157,19 @@ func (r *Reader) Read() (record []string, err error) {
 		}
 		// If FieldsPerRecord < 0, no check is performed
 
+		r.nonCommentRecordCount++
 		return record, nil
 	}
 }
 
-// isFirstNonCommentRecord checks if this is the first non-comment record being returned
+// isFirstNonCommentRecord checks if this is the first non-comment record being returned.
+// Uses O(1) counter instead of O(n) re-scanning.
 func (r *Reader) isFirstNonCommentRecord() bool {
-	// Count how many non-comment records we've processed
-	nonCommentCount := 0
-	for i := 0; i < r.currentRecordIndex; i++ {
-		if i < len(r.parseResult.rows) && !r.isCommentLine(r.parseResult.rows[i]) {
-			nonCommentCount++
-		}
-	}
-	return nonCommentCount == 1
+	return r.nonCommentRecordCount == 0
 }
 
 // isCommentLine checks if a row is a comment line
-func (r *Reader) isCommentLine(row rowInfo) bool {
+func (r *Reader) isCommentLine(row rowInfo, rowIdx int) bool {
 	if r.Comment == 0 || row.fieldCount == 0 {
 		return false
 	}
@@ -186,15 +186,16 @@ func (r *Reader) isCommentLine(row rowInfo) bool {
 		return false
 	}
 	// Get the raw start position (the original field start in rawBuffer)
-	rawStart := r.getRawFieldStart(row, firstFieldIdx)
+	rawStart := r.getRawFieldStart(row, rowIdx, firstFieldIdx)
 	if rawStart < uint64(len(r.rawBuffer)) {
 		return r.rawBuffer[rawStart] == byte(r.Comment)
 	}
 	return false
 }
 
-// getRawFieldStart gets the original field start position before quote adjustment
-func (r *Reader) getRawFieldStart(row rowInfo, fieldIdx int) uint64 {
+// getRawFieldStart gets the original field start position before quote adjustment.
+// Uses O(1) lookup with rowIdx instead of O(n) search.
+func (r *Reader) getRawFieldStart(row rowInfo, rowIdx, fieldIdx int) uint64 {
 	// For the first field of a row, we need to find the actual start
 	// which is either:
 	// - 0 for the first row
@@ -208,15 +209,8 @@ func (r *Reader) getRawFieldStart(row rowInfo, fieldIdx int) uint64 {
 	// If quoteAdjust was applied, start is field.start - 1
 	// But for comment detection, we need the actual line start
 	// We can find it by looking at the previous row's end position
-	prevRowIdx := -1
-	for i, r := range r.parseResult.rows {
-		if r.firstField == row.firstField {
-			prevRowIdx = i - 1
-			break
-		}
-	}
-	if prevRowIdx >= 0 {
-		prevRow := r.parseResult.rows[prevRowIdx]
+	if rowIdx > 0 {
+		prevRow := r.parseResult.rows[rowIdx-1]
 		lastFieldIdx := prevRow.firstField + prevRow.fieldCount - 1
 		if lastFieldIdx >= 0 && lastFieldIdx < len(r.parseResult.fields) {
 			lastField := r.parseResult.fields[lastFieldIdx]
@@ -227,7 +221,7 @@ func (r *Reader) getRawFieldStart(row rowInfo, fieldIdx int) uint64 {
 	return field.start
 }
 
-// initialize reads all input and runs Stage 1 and Stage 2 processing.
+// initialize reads all input and runs scanBuffer and parseBuffer processing.
 func (r *Reader) initialize() error {
 	r.initialized = true
 
@@ -269,7 +263,7 @@ func (r *Reader) initialize() error {
 }
 
 // buildRecordWithValidation constructs a []string record from a rowInfo with quote validation
-func (r *Reader) buildRecordWithValidation(row rowInfo) ([]string, error) {
+func (r *Reader) buildRecordWithValidation(row rowInfo, rowIdx int) ([]string, error) {
 	fieldCount := row.fieldCount
 	record := r.allocateRecord(fieldCount)
 
@@ -283,7 +277,7 @@ func (r *Reader) buildRecordWithValidation(row rowInfo) ([]string, error) {
 		field := r.parseResult.fields[fieldIdx]
 
 		// Get raw field data for validation
-		rawStart, rawEnd := r.getFieldRawBounds(row, fieldIdx, i)
+		rawStart, rawEnd := r.getFieldRawBounds(row, rowIdx, fieldIdx, i)
 
 		// Validate quotes unless LazyQuotes is enabled
 		if !r.LazyQuotes {
@@ -306,7 +300,7 @@ func (r *Reader) buildRecordWithValidation(row rowInfo) ([]string, error) {
 }
 
 // getFieldRawBounds returns the raw start and end positions for a field in the buffer
-func (r *Reader) getFieldRawBounds(row rowInfo, fieldIdx, fieldNum int) (uint64, uint64) {
+func (r *Reader) getFieldRawBounds(row rowInfo, rowIdx, fieldIdx, fieldNum int) (uint64, uint64) {
 	field := r.parseResult.fields[fieldIdx]
 
 	// Calculate raw start (before any quote adjustment)
@@ -317,7 +311,7 @@ func (r *Reader) getFieldRawBounds(row rowInfo, fieldIdx, fieldNum int) (uint64,
 			rawStart = 0
 		} else {
 			// Find the position after the previous newline
-			rawStart = r.findLineStart(row)
+			rawStart = r.findLineStart(rowIdx)
 		}
 	} else {
 		// For non-first fields, find the position after the previous separator
@@ -384,17 +378,9 @@ func (r *Reader) findRawFieldEnd(start uint64, isLastField bool) uint64 {
 	return bufLen
 }
 
-// findLineStart finds the start position of a line
-func (r *Reader) findLineStart(row rowInfo) uint64 {
-	// Find the row index
-	rowIdx := -1
-	for i, ri := range r.parseResult.rows {
-		if ri.firstField == row.firstField && ri.lineNum == row.lineNum {
-			rowIdx = i
-			break
-		}
-	}
-
+// findLineStart finds the start position of a line.
+// Uses O(1) lookup with rowIdx instead of O(n) search.
+func (r *Reader) findLineStart(rowIdx int) uint64 {
 	if rowIdx <= 0 {
 		return 0
 	}
@@ -517,10 +503,20 @@ func (r *Reader) InputOffset() int64 {
 
 // ReaderOptions contains extended configuration options for [Reader].
 type ReaderOptions struct {
-	BufferSize int  // BufferSize specifies the internal buffer size in bytes. Default is 64KB.
-	ChunkSize  int  // Parallel processing chunk size
-	ZeroCopy   bool // Zero-copy optimization (default: false)
-	SkipBOM    bool // Skip UTF-8 BOM (default: false)
+	// SkipBOM skips UTF-8 BOM (EF BB BF) at the beginning of input if present.
+	SkipBOM bool
+
+	// BufferSize specifies the internal buffer size hint in bytes.
+	// NOTE: Not yet implemented; reserved for future streaming support.
+	BufferSize int
+
+	// ChunkSize specifies the parallel processing chunk size.
+	// NOTE: Not yet implemented; reserved for future streaming support.
+	ChunkSize int
+
+	// ZeroCopy enables zero-copy optimization.
+	// NOTE: Not yet implemented; reserved for future optimization.
+	ZeroCopy bool
 }
 
 // NewReaderWithOptions creates a Reader with extended options.
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@ var (`
`14`	`14`	`ErrFieldCount = errors.New("wrong number of fields")`
`15`	`15`	`)`
`16`	`16`
`17`		`-// ParseError represents a parsing error with location information`
	`17`	`+// ParseError represents a parsing error with location information.`
`18`	`18`	`type ParseError struct {`
`19`	`19`	`StartLine int // Record start line`
`20`	`20`	`Line int // Error line`