Skip to content

Commit dce80a3

Browse files
committed
Optimize PositionLastLines by reading in chunk instead of a single character at a time
1 parent f0148cd commit dce80a3

File tree

2 files changed

+184
-27
lines changed

2 files changed

+184
-27
lines changed

internal/capture/post.go

Lines changed: 97 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -42,38 +42,112 @@ func PositionLast5000Lines(file *os.File) (err error) {
4242
return PositionLastLines(file, 5000)
4343
}
4444

45+
// PositionLastLines moves the file offset so that the next read returns the last n lines.
46+
//
47+
// Lines are separated by '\n'. If the file does not end with '\n', the final partial
48+
// line counts as a line. If the file ends with '\n', that newline is treated as the
49+
// terminator of the last line (not as an extra empty line).
50+
//
51+
// If the file has fewer than n lines, the offset is set to the beginning of the file.
52+
// If n == 0, the offset is set to end-of-file.
53+
// The function returns an error if any I/O operation fails.
4554
func PositionLastLines(file *os.File, n uint) (err error) {
46-
var cursor int64 = 0
47-
stat, err := file.Stat()
48-
if err != nil {
49-
return
50-
}
51-
size := stat.Size()
52-
char := make([]byte, 1)
53-
lines := n
54-
for {
55-
cursor -= 1
56-
_, err = file.Seek(cursor, io.SeekEnd)
55+
// n == 0: "last 0 lines" -> position at EOF
56+
if n == 0 {
57+
_, err := file.Seek(0, io.SeekEnd)
5758
if err != nil {
58-
return
59+
return fmt.Errorf("seek to end: %w", err)
5960
}
60-
_, err = file.Read(char)
61+
return nil
62+
}
63+
64+
// Determine file size
65+
fileSize, err := file.Seek(0, io.SeekEnd)
66+
if err != nil {
67+
return fmt.Errorf("seek to end: %w", err)
68+
}
69+
if fileSize == 0 {
70+
// Empty file: position at start
71+
_, err := file.Seek(0, io.SeekStart)
6172
if err != nil {
62-
return
73+
return fmt.Errorf("seek to start: %w", err)
6374
}
64-
switch char[0] {
65-
case '\r':
66-
case '\n':
67-
lines--
75+
return nil
76+
}
77+
78+
// Check whether the file ends with a newline
79+
var lastByte [1]byte
80+
if _, err := file.ReadAt(lastByte[:], fileSize-1); err != nil {
81+
return fmt.Errorf("read last byte: %w", err)
82+
}
83+
endsWithNewline := lastByte[0] == '\n'
84+
85+
// To get the last n lines, we count newlines from the end:
86+
// - If file ends with '\n': we want to find the (n+1)th newline from EOF,
87+
// then start reading after it. The trailing '\n' is the line terminator
88+
// for the last line, not an extra empty line.
89+
// - If file doesn't end with '\n': we want the nth newline from EOF.
90+
//
91+
// Example: "a\nb\nc\n" with n=1 should give "c\n"
92+
// - We skip the trailing '\n' (line terminator)
93+
// - Find 1 more '\n' before it (the one after 'b')
94+
// - Start after that '\n' → "c\n"
95+
//
96+
// Example: "a\nb\nc" with n=1 should give "c"
97+
// - No trailing '\n'
98+
// - Find 1 '\n' from the end (the one after 'b')
99+
// - Start after that '\n' → "c"
100+
targetNewlines := int(n)
101+
if endsWithNewline {
102+
targetNewlines = int(n) + 1
103+
}
104+
105+
const bufferSize = 4096
106+
buf := make([]byte, bufferSize)
107+
108+
newlinesSeen := 0
109+
position := fileSize // position is the start offset of the next block to read
110+
111+
for position > 0 && newlinesSeen < targetNewlines {
112+
// Determine how much to read
113+
blockSize := bufferSize
114+
if int64(blockSize) > position {
115+
blockSize = int(position)
68116
}
69-
if lines == 0 {
70-
return
117+
118+
// Move to the previous block
119+
position -= int64(blockSize)
120+
121+
// Read block at fixed position without changing the current file offset
122+
nRead, err := file.ReadAt(buf[:blockSize], position)
123+
if err != nil && err != io.EOF {
124+
return fmt.Errorf("read chunk at %d: %w", position, err)
71125
}
72-
if cursor == -size {
73-
_, err = file.Seek(0, io.SeekStart)
74-
return
126+
chunk := buf[:nRead]
127+
128+
// Scan this chunk backwards for newline characters
129+
for i := len(chunk) - 1; i >= 0; i-- {
130+
if chunk[i] == '\n' {
131+
newlinesSeen++
132+
if newlinesSeen == targetNewlines {
133+
// Start just after this newline
134+
start := position + int64(i+1)
135+
_, err := file.Seek(start, io.SeekStart)
136+
if err != nil {
137+
return fmt.Errorf("seek to %d: %w", start, err)
138+
}
139+
return nil
140+
}
141+
}
75142
}
76143
}
144+
145+
// Not enough newline characters: the whole file is within the last n lines
146+
_, err = file.Seek(0, io.SeekStart)
147+
if err != nil {
148+
return fmt.Errorf("seek to start: %w", err)
149+
}
150+
return nil
77151
}
78152

79153
func PostCustomData(endpoint, params string, file *os.File) (msg string, ok bool) {

internal/capture/post_test.go

Lines changed: 87 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,14 @@ func TestPositionLastLines_EmptyFile(t *testing.T) {
114114
require.NoError(t, err, "failed to open empty test file")
115115
defer file.Close()
116116

117-
// Test with empty file - this should return an error due to the current implementation
118-
// The function tries to seek -1 from end on an empty file, which is invalid
117+
// Test with empty file - should handle gracefully (no error)
119118
err = PositionLastLines(file, 5)
120-
assert.Error(t, err, "PositionLastLines should return error for empty file due to current implementation")
119+
assert.NoError(t, err, "PositionLastLines should handle empty file gracefully")
120+
121+
// Verify file is positioned at end (nothing to read)
122+
bytes, err := io.ReadAll(file)
123+
require.NoError(t, err, "failed to read file content")
124+
assert.Empty(t, bytes, "empty file should remain empty")
121125
}
122126

123127
func TestPositionLastLines_EdgeCases(t *testing.T) {
@@ -131,7 +135,7 @@ func TestPositionLastLines_EdgeCases(t *testing.T) {
131135
name: "file with only newlines",
132136
content: "\n\n\n",
133137
lines: 2,
134-
expectedResult: "\n",
138+
expectedResult: "\n\n",
135139
},
136140
{
137141
name: "file without trailing newline",
@@ -145,6 +149,84 @@ func TestPositionLastLines_EdgeCases(t *testing.T) {
145149
lines: 0,
146150
expectedResult: "",
147151
},
152+
{
153+
name: "file with trailing newline - 1 line",
154+
content: "a\nb\nc\n",
155+
lines: 1,
156+
expectedResult: "c\n",
157+
},
158+
{
159+
name: "file with trailing newline - 2 lines",
160+
content: "a\nb\nc\n",
161+
lines: 2,
162+
expectedResult: "b\nc\n",
163+
},
164+
{
165+
name: "file with trailing newline - 3 lines",
166+
content: "a\nb\nc\n",
167+
lines: 3,
168+
expectedResult: "a\nb\nc\n",
169+
},
170+
{
171+
name: "file with trailing newline - more than available",
172+
content: "a\nb\nc\n",
173+
lines: 4,
174+
expectedResult: "a\nb\nc\n",
175+
},
176+
{
177+
name: "file without trailing newline - 1 line",
178+
content: "a\nb\nc",
179+
lines: 1,
180+
expectedResult: "c",
181+
},
182+
{
183+
name: "file without trailing newline - 2 lines",
184+
content: "a\nb\nc",
185+
lines: 2,
186+
expectedResult: "b\nc",
187+
},
188+
{
189+
name: "file without trailing newline - 3 lines",
190+
content: "a\nb\nc",
191+
lines: 3,
192+
expectedResult: "a\nb\nc",
193+
},
194+
{
195+
name: "single line with newline",
196+
content: "a\n",
197+
lines: 1,
198+
expectedResult: "a\n",
199+
},
200+
{
201+
name: "single line without newline",
202+
content: "a",
203+
lines: 1,
204+
expectedResult: "a",
205+
},
206+
{
207+
name: "single newline",
208+
content: "\n",
209+
lines: 1,
210+
expectedResult: "\n",
211+
},
212+
{
213+
name: "multiple trailing newlines",
214+
content: "a\n\n\n",
215+
lines: 1,
216+
expectedResult: "\n",
217+
},
218+
{
219+
name: "multiple trailing newlines - 2 lines",
220+
content: "a\n\n\n",
221+
lines: 2,
222+
expectedResult: "\n\n",
223+
},
224+
{
225+
name: "multiple trailing newlines - 3 lines",
226+
content: "a\n\n\n",
227+
lines: 3,
228+
expectedResult: "a\n\n\n",
229+
},
148230
}
149231

150232
for _, tt := range tests {
@@ -169,3 +251,4 @@ func TestPositionLastLines_EdgeCases(t *testing.T) {
169251
})
170252
}
171253
}
254+

0 commit comments

Comments
 (0)