Skip to content

Commit 7402ad0

Browse files
committed
feat: Added file encoding detection and reading functions
1 parent 12f94fc commit 7402ad0

File tree

15 files changed

+237
-100
lines changed

15 files changed

+237
-100
lines changed

pnpm-lock.yaml

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/core/tools/applyDiffTool.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import path from "path"
22
import fs from "fs/promises"
33

44
import { TelemetryService } from "@roo-code/telemetry"
5+
import { readFileWithEncodingDetection } from "../../utils/encoding"
56
import { DEFAULT_WRITE_DELAY_MS } from "@roo-code/types"
67

78
import { ClineSayTool } from "../../shared/ExtensionMessage"
@@ -88,7 +89,7 @@ export async function applyDiffToolLegacy(
8889
return
8990
}
9091

91-
const originalContent: string = await fs.readFile(absolutePath, "utf-8")
92+
const originalContent: string = await readFileWithEncodingDetection(absolutePath)
9293

9394
// Apply the diff to the original content
9495
const diffResult = (await cline.diffStrategy?.applyDiff(

src/core/tools/insertContentTool.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import fs from "fs/promises"
33
import path from "path"
44

55
import { getReadablePath } from "../../utils/path"
6+
import { readFileWithEncodingDetection } from "../../utils/encoding"
67
import { Task } from "../task/Task"
78
import { ToolUse, AskApproval, HandleError, PushToolResult, RemoveClosingTag } from "../../shared/tools"
89
import { formatResponse } from "../prompts/responses"
@@ -92,7 +93,7 @@ export async function insertContentTool(
9293
return
9394
}
9495
} else {
95-
fileContent = await fs.readFile(absolutePath, "utf8")
96+
fileContent = await readFileWithEncodingDetection(absolutePath)
9697
}
9798

9899
cline.consecutiveMistakeCount = 0

src/core/tools/multiApplyDiffTool.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import path from "path"
22
import fs from "fs/promises"
33

44
import { TelemetryService } from "@roo-code/telemetry"
5+
import { readFileWithEncodingDetection } from "../../utils/encoding"
56
import { DEFAULT_WRITE_DELAY_MS } from "@roo-code/types"
67

78
import { ClineSayTool } from "../../shared/ExtensionMessage"
@@ -417,7 +418,7 @@ Original error: ${errorMessage}`
417418
const fileExists = opResult.fileExists!
418419

419420
try {
420-
let originalContent: string | null = await fs.readFile(absolutePath, "utf-8")
421+
let originalContent: string | null = await readFileWithEncodingDetection(absolutePath)
421422
let successCount = 0
422423
let formattedError = ""
423424

@@ -566,7 +567,7 @@ ${errorDetails ? `\nTechnical details:\n${errorDetails}\n` : ""}
566567
cline.diffViewProvider.scrollToFirstDiff()
567568
} else {
568569
// For direct save, we still need to set originalContent
569-
cline.diffViewProvider.originalContent = await fs.readFile(absolutePath, "utf-8")
570+
cline.diffViewProvider.originalContent = await readFileWithEncodingDetection(absolutePath)
570571
}
571572

572573
// Ask for approval (same for both flows)
@@ -601,7 +602,7 @@ ${errorDetails ? `\nTechnical details:\n${errorDetails}\n` : ""}
601602
if (isPreventFocusDisruptionEnabled) {
602603
// Direct file write without diff view or opening the file
603604
cline.diffViewProvider.editType = "modify"
604-
cline.diffViewProvider.originalContent = await fs.readFile(absolutePath, "utf-8")
605+
cline.diffViewProvider.originalContent = await readFileWithEncodingDetection(absolutePath)
605606
await cline.diffViewProvider.saveDirectly(
606607
relPath,
607608
originalContent!,

src/core/tools/searchAndReplaceTool.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import fs from "fs/promises"
44
import delay from "delay"
55

66
// Internal imports
7+
import { readFileWithEncodingDetection } from "../../utils/encoding"
78
import { Task } from "../task/Task"
89
import { AskApproval, HandleError, PushToolResult, RemoveClosingTag, ToolUse } from "../../shared/tools"
910
import { formatResponse } from "../prompts/responses"
@@ -148,7 +149,7 @@ export async function searchAndReplaceTool(
148149
// Read and process file content
149150
let fileContent: string
150151
try {
151-
fileContent = await fs.readFile(absolutePath, "utf-8")
152+
fileContent = await readFileWithEncodingDetection(absolutePath)
152153
} catch (error) {
153154
cline.consecutiveMistakeCount++
154155
cline.recordToolError("search_and_replace")

src/core/tools/writeToFileTool.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import * as vscode from "vscode"
44
import fs from "fs/promises"
55

66
import { Task } from "../task/Task"
7+
import { readFileWithEncodingDetection } from "../../utils/encoding"
78
import { ClineSayTool } from "../../shared/ExtensionMessage"
89
import { formatResponse } from "../prompts/responses"
910
import { ToolUse, AskApproval, HandleError, PushToolResult, RemoveClosingTag } from "../../shared/tools"
@@ -217,7 +218,7 @@ export async function writeToFileTool(
217218
cline.diffViewProvider.editType = fileExists ? "modify" : "create"
218219
if (fileExists) {
219220
const absolutePath = path.resolve(cline.cwd, relPath)
220-
cline.diffViewProvider.originalContent = await fs.readFile(absolutePath, "utf-8")
221+
cline.diffViewProvider.originalContent = await readFileWithEncodingDetection(absolutePath)
221222
} else {
222223
cline.diffViewProvider.originalContent = ""
223224
}

src/integrations/editor/DiffViewProvider.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { XMLBuilder } from "fast-xml-parser"
77
import delay from "delay"
88

99
import { createDirectoriesForFile } from "../../utils/fs"
10+
import { readFileWithEncodingDetection } from "../../utils/encoding"
1011
import { arePathsEqual, getReadablePath } from "../../utils/path"
1112
import { formatResponse } from "../../core/prompts/responses"
1213
import { diagnosticsToProblemsString, getNewDiagnostics } from "../diagnostics"
@@ -68,7 +69,7 @@ export class DiffViewProvider {
6869
this.preDiagnostics = vscode.languages.getDiagnostics()
6970

7071
if (fileExists) {
71-
this.originalContent = await fs.readFile(absolutePath, "utf-8")
72+
this.originalContent = await readFileWithEncodingDetection(absolutePath)
7273
} else {
7374
this.originalContent = ""
7475
}

src/integrations/editor/__tests__/DiffViewProvider.spec.ts

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,34 @@ vi.mock("delay", () => ({
99
}))
1010

1111
// Mock fs/promises
12-
vi.mock("fs/promises", () => ({
13-
readFile: vi.fn().mockResolvedValue("file content"),
14-
writeFile: vi.fn().mockResolvedValue(undefined),
15-
}))
12+
vi.mock("fs/promises", async () => {
13+
const actual = await vi.importActual("fs/promises");
14+
return {
15+
...actual,
16+
readFile: vi.fn().mockResolvedValue("file content"),
17+
writeFile: vi.fn().mockResolvedValue(undefined),
18+
default: {
19+
readFile: vi.fn().mockResolvedValue("file content"),
20+
writeFile: vi.fn().mockResolvedValue(undefined),
21+
}
22+
}
23+
})
1624

1725
// Mock utils
1826
vi.mock("../../../utils/fs", () => ({
1927
createDirectoriesForFile: vi.fn().mockResolvedValue([]),
2028
}))
2129

2230
// Mock path
23-
vi.mock("path", () => ({
24-
resolve: vi.fn((cwd, relPath) => `${cwd}/${relPath}`),
25-
basename: vi.fn((path) => path.split("/").pop()),
26-
}))
27-
31+
vi.mock("path", async () => {
32+
const actual = await vi.importActual("path");
33+
return {
34+
...actual,
35+
resolve: vi.fn((cwd, relPath) => `${cwd}/${relPath}`),
36+
basename: vi.fn((path) => path.split("/").pop()),
37+
};
38+
})
39+
// Mock vscode
2840
// Mock vscode
2941
vi.mock("vscode", () => ({
3042
workspace: {
@@ -90,7 +102,6 @@ vi.mock("vscode", () => ({
90102
parse: vi.fn((uri) => ({ with: vi.fn(() => ({})) })),
91103
},
92104
}))
93-
94105
// Mock DecorationController
95106
vi.mock("../DecorationController", () => ({
96107
DecorationController: vi.fn().mockImplementation(() => ({

src/integrations/misc/extract-text.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { isBinaryFile } from "isbinaryfile"
77
import { extractTextFromXLSX } from "./extract-text-from-xlsx"
88
import { countFileLines } from "./line-counter"
99
import { readLines } from "./read-lines"
10+
import { readFileWithEncodingDetection } from "../../utils/encoding"
1011

1112
async function extractTextFromPDF(filePath: string): Promise<string> {
1213
const dataBuffer = await fs.readFile(filePath)
@@ -20,7 +21,7 @@ async function extractTextFromDOCX(filePath: string): Promise<string> {
2021
}
2122

2223
async function extractTextFromIPYNB(filePath: string): Promise<string> {
23-
const data = await fs.readFile(filePath, "utf8")
24+
const data = await readFileWithEncodingDetection(filePath)
2425
const notebook = JSON.parse(data)
2526
let extractedText = ""
2627

@@ -103,7 +104,7 @@ export async function extractTextFromFile(filePath: string, maxReadFileLine?: nu
103104
}
104105
}
105106
// Read the entire file if no limit or file is within limit
106-
return addLineNumbers(await fs.readFile(filePath, "utf8"))
107+
return addLineNumbers(await readFileWithEncodingDetection(filePath))
107108
} else {
108109
throw new Error(`Cannot read text for file type: ${fileExtension}`)
109110
}

src/integrations/misc/read-lines.ts

Lines changed: 85 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
* Now you can read a range of lines from a file
88
*/
99
import { createReadStream } from "fs"
10+
import { open } from "fs/promises"
11+
import * as iconv from "iconv-lite"
12+
import { detectEncoding } from "../../utils/encoding"
1013

1114
const outOfRangeError = (filepath: string, n: number) => {
1215
return new RangeError(`Line with index ${n} does not exist in '${filepath}'. Note that line indexing is zero-based`)
@@ -52,65 +55,88 @@ export function readLines(filepath: string, endLine?: number, startLine?: number
5255
)
5356
}
5457

55-
// Set up stream
56-
const input = createReadStream(filepath)
57-
let buffer = ""
58-
let lineCount = 0
59-
let result = ""
60-
61-
// Handle errors
62-
input.on("error", reject)
63-
64-
// Process data chunks directly
65-
input.on("data", (chunk) => {
66-
// Add chunk to buffer
67-
buffer += chunk.toString()
68-
69-
let pos = 0
70-
let nextNewline = buffer.indexOf("\n", pos)
71-
72-
// Process complete lines in the buffer
73-
while (nextNewline !== -1) {
74-
// If we're in the target range, add this line to the result
75-
if (lineCount >= effectiveStartLine && (endLine === undefined || lineCount <= endLine)) {
76-
result += buffer.substring(pos, nextNewline + 1) // Include the newline
77-
}
78-
79-
// Move position and increment line counter
80-
pos = nextNewline + 1
81-
lineCount++
82-
83-
// If we've reached the end line, we can stop
84-
if (endLine !== undefined && lineCount > endLine) {
85-
input.destroy()
86-
resolve(result)
87-
return
88-
}
89-
90-
// Find next newline
91-
nextNewline = buffer.indexOf("\n", pos)
92-
}
93-
94-
// Trim buffer - keep only the incomplete line
95-
buffer = buffer.substring(pos)
96-
})
97-
98-
// Handle end of file
99-
input.on("end", () => {
100-
// Process any remaining data in buffer (last line without newline)
101-
if (buffer.length > 0) {
102-
if (lineCount >= effectiveStartLine && (endLine === undefined || lineCount <= endLine)) {
103-
result += buffer
58+
// Sample the first 64KB for encoding detection
59+
open(filepath, 'r')
60+
.then(fileHandle => {
61+
const sampleBuffer = Buffer.alloc(65536);
62+
return fileHandle.read(sampleBuffer, 0, sampleBuffer.length, 0)
63+
.then(() => sampleBuffer)
64+
.finally(() => fileHandle.close());
65+
})
66+
.then(sampleBuffer => detectEncoding(sampleBuffer))
67+
.then(encoding => {
68+
// Node.js native supported encodings
69+
const nodeEncodings = ['utf8', 'ascii', 'latin1'];
70+
71+
// Choose decoding method based on native support
72+
let input: NodeJS.ReadableStream;
73+
if (nodeEncodings.includes(encoding.toLowerCase())) {
74+
input = createReadStream(filepath, { encoding: encoding as BufferEncoding });
75+
} else {
76+
input = createReadStream(filepath).pipe(iconv.decodeStream(encoding));
10477
}
105-
lineCount++
106-
}
107-
108-
// Check if we found any lines in the requested range
109-
if (lineCount <= effectiveStartLine) {
110-
reject(outOfRangeError(filepath, effectiveStartLine))
111-
} else {
112-
resolve(result)
113-
}
114-
})
78+
79+
let buffer = ""
80+
let lineCount = 0
81+
let result = ""
82+
83+
// Handle errors
84+
input.on("error", reject)
85+
86+
// Process data chunks directly
87+
input.on("data", (chunk) => {
88+
// Add chunk to buffer (chunk is already decoded using the detected encoding)
89+
buffer += chunk
90+
91+
let pos = 0
92+
let nextNewline = buffer.indexOf("\n", pos)
93+
94+
// Process complete lines in the buffer
95+
while (nextNewline !== -1) {
96+
// If we're in the target range, add this line to the result
97+
if (lineCount >= effectiveStartLine && (endLine === undefined || lineCount <= endLine)) {
98+
result += buffer.substring(pos, nextNewline + 1) // Include the newline
99+
}
100+
101+
// Move position and increment line counter
102+
pos = nextNewline + 1
103+
lineCount++
104+
105+
// If we've reached the end line, we can stop
106+
if (endLine !== undefined && lineCount > endLine) {
107+
(input as any).destroy?.()
108+
resolve(result)
109+
return
110+
}
111+
112+
// Find next newline
113+
nextNewline = buffer.indexOf("\n", pos)
114+
}
115+
116+
// Trim buffer - keep only the incomplete line
117+
buffer = buffer.substring(pos)
118+
})
119+
120+
// Handle end of file
121+
input.on("end", () => {
122+
// Process any remaining data in buffer (last line without newline)
123+
if (buffer.length > 0) {
124+
if (lineCount >= effectiveStartLine && (endLine === undefined || lineCount <= endLine)) {
125+
result += buffer
126+
}
127+
lineCount++
128+
}
129+
130+
// Check if we found any lines in the requested range
131+
if (lineCount <= effectiveStartLine) {
132+
reject(outOfRangeError(filepath, effectiveStartLine))
133+
} else {
134+
resolve(result)
135+
}
136+
})
137+
})
138+
.catch(error => {
139+
reject(error);
140+
});
115141
})
116142
}

0 commit comments

Comments
 (0)