|
7 | 7 | * Now you can read a range of lines from a file |
8 | 8 | */ |
9 | 9 | import { createReadStream } from "fs" |
| 10 | +import { open } from "fs/promises" |
| 11 | +import * as iconv from "iconv-lite" |
| 12 | +import { detectEncoding } from "../../utils/encoding" |
10 | 13 |
|
11 | 14 | const outOfRangeError = (filepath: string, n: number) => { |
12 | 15 | return new RangeError(`Line with index ${n} does not exist in '${filepath}'. Note that line indexing is zero-based`) |
@@ -52,65 +55,88 @@ export function readLines(filepath: string, endLine?: number, startLine?: number |
52 | 55 | ) |
53 | 56 | } |
54 | 57 |
|
55 | | - // Set up stream |
56 | | - const input = createReadStream(filepath) |
57 | | - let buffer = "" |
58 | | - let lineCount = 0 |
59 | | - let result = "" |
60 | | - |
61 | | - // Handle errors |
62 | | - input.on("error", reject) |
63 | | - |
64 | | - // Process data chunks directly |
65 | | - input.on("data", (chunk) => { |
66 | | - // Add chunk to buffer |
67 | | - buffer += chunk.toString() |
68 | | - |
69 | | - let pos = 0 |
70 | | - let nextNewline = buffer.indexOf("\n", pos) |
71 | | - |
72 | | - // Process complete lines in the buffer |
73 | | - while (nextNewline !== -1) { |
74 | | - // If we're in the target range, add this line to the result |
75 | | - if (lineCount >= effectiveStartLine && (endLine === undefined || lineCount <= endLine)) { |
76 | | - result += buffer.substring(pos, nextNewline + 1) // Include the newline |
77 | | - } |
78 | | - |
79 | | - // Move position and increment line counter |
80 | | - pos = nextNewline + 1 |
81 | | - lineCount++ |
82 | | - |
83 | | - // If we've reached the end line, we can stop |
84 | | - if (endLine !== undefined && lineCount > endLine) { |
85 | | - input.destroy() |
86 | | - resolve(result) |
87 | | - return |
88 | | - } |
89 | | - |
90 | | - // Find next newline |
91 | | - nextNewline = buffer.indexOf("\n", pos) |
92 | | - } |
93 | | - |
94 | | - // Trim buffer - keep only the incomplete line |
95 | | - buffer = buffer.substring(pos) |
96 | | - }) |
97 | | - |
98 | | - // Handle end of file |
99 | | - input.on("end", () => { |
100 | | - // Process any remaining data in buffer (last line without newline) |
101 | | - if (buffer.length > 0) { |
102 | | - if (lineCount >= effectiveStartLine && (endLine === undefined || lineCount <= endLine)) { |
103 | | - result += buffer |
| 58 | + // Sample the first 64KB for encoding detection |
| 59 | + open(filepath, 'r') |
| 60 | + .then(fileHandle => { |
| 61 | + const sampleBuffer = Buffer.alloc(65536); |
| 62 | + return fileHandle.read(sampleBuffer, 0, sampleBuffer.length, 0) |
| 63 | + .then(() => sampleBuffer) |
| 64 | + .finally(() => fileHandle.close()); |
| 65 | + }) |
| 66 | + .then(sampleBuffer => detectEncoding(sampleBuffer)) |
| 67 | + .then(encoding => { |
| 68 | + // Node.js native supported encodings |
| 69 | + const nodeEncodings = ['utf8', 'ascii', 'latin1']; |
| 70 | + |
| 71 | + // Choose decoding method based on native support |
| 72 | + let input: NodeJS.ReadableStream; |
| 73 | + if (nodeEncodings.includes(encoding.toLowerCase())) { |
| 74 | + input = createReadStream(filepath, { encoding: encoding as BufferEncoding }); |
| 75 | + } else { |
| 76 | + input = createReadStream(filepath).pipe(iconv.decodeStream(encoding)); |
104 | 77 | } |
105 | | - lineCount++ |
106 | | - } |
107 | | - |
108 | | - // Check if we found any lines in the requested range |
109 | | - if (lineCount <= effectiveStartLine) { |
110 | | - reject(outOfRangeError(filepath, effectiveStartLine)) |
111 | | - } else { |
112 | | - resolve(result) |
113 | | - } |
114 | | - }) |
| 78 | + |
| 79 | + let buffer = "" |
| 80 | + let lineCount = 0 |
| 81 | + let result = "" |
| 82 | + |
| 83 | + // Handle errors |
| 84 | + input.on("error", reject) |
| 85 | + |
| 86 | + // Process data chunks directly |
| 87 | + input.on("data", (chunk) => { |
| 88 | + // Add chunk to buffer (chunk is already decoded using the detected encoding) |
| 89 | + buffer += chunk |
| 90 | + |
| 91 | + let pos = 0 |
| 92 | + let nextNewline = buffer.indexOf("\n", pos) |
| 93 | + |
| 94 | + // Process complete lines in the buffer |
| 95 | + while (nextNewline !== -1) { |
| 96 | + // If we're in the target range, add this line to the result |
| 97 | + if (lineCount >= effectiveStartLine && (endLine === undefined || lineCount <= endLine)) { |
| 98 | + result += buffer.substring(pos, nextNewline + 1) // Include the newline |
| 99 | + } |
| 100 | + |
| 101 | + // Move position and increment line counter |
| 102 | + pos = nextNewline + 1 |
| 103 | + lineCount++ |
| 104 | + |
| 105 | + // If we've reached the end line, we can stop |
| 106 | + if (endLine !== undefined && lineCount > endLine) { |
| 107 | + (input as any).destroy?.() |
| 108 | + resolve(result) |
| 109 | + return |
| 110 | + } |
| 111 | + |
| 112 | + // Find next newline |
| 113 | + nextNewline = buffer.indexOf("\n", pos) |
| 114 | + } |
| 115 | + |
| 116 | + // Trim buffer - keep only the incomplete line |
| 117 | + buffer = buffer.substring(pos) |
| 118 | + }) |
| 119 | + |
| 120 | + // Handle end of file |
| 121 | + input.on("end", () => { |
| 122 | + // Process any remaining data in buffer (last line without newline) |
| 123 | + if (buffer.length > 0) { |
| 124 | + if (lineCount >= effectiveStartLine && (endLine === undefined || lineCount <= endLine)) { |
| 125 | + result += buffer |
| 126 | + } |
| 127 | + lineCount++ |
| 128 | + } |
| 129 | + |
| 130 | + // Check if we found any lines in the requested range |
| 131 | + if (lineCount <= effectiveStartLine) { |
| 132 | + reject(outOfRangeError(filepath, effectiveStartLine)) |
| 133 | + } else { |
| 134 | + resolve(result) |
| 135 | + } |
| 136 | + }) |
| 137 | + }) |
| 138 | + .catch(error => { |
| 139 | + reject(error); |
| 140 | + }); |
115 | 141 | }) |
116 | 142 | } |
0 commit comments