Skip to content

Commit b21e2f7

Browse files
authored
feat(encoding): add filepath logging and optimize encoding detection (#586)
1 parent 7f8f415 commit b21e2f7

File tree

3 files changed

+12
-7
lines changed

3 files changed

+12
-7
lines changed

src/integrations/misc/read-lines.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ export function readLines(filepath: string, endLine?: number, startLine?: number
6464
.then(() => sampleBuffer)
6565
.finally(() => fileHandle.close())
6666
})
67-
.then((sampleBuffer) => detectEncoding(sampleBuffer))
67+
.then((sampleBuffer) => detectEncoding(sampleBuffer, "", filepath))
6868
.then((encoding) => {
6969
// Node.js native supported encodings
7070
const nodeEncodings = ["utf8", "ascii", "latin1"]

src/services/code-index/processors/scanner.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ export class DirectoryScanner implements IDirectoryScanner {
139139
// Read file content with encoding detection
140140
const fileBuffer = await vscode.workspace.fs.readFile(vscode.Uri.file(filePath))
141141
const buffer = Buffer.from(fileBuffer)
142-
const encoding = await detectEncoding(buffer)
142+
const encoding = await detectEncoding(buffer, "", filePath)
143143
const content = iconv.decode(buffer, encoding)
144144

145145
// Calculate current hash

src/utils/encoding.ts

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ export const BINARY_MAGIC_NUMBERS = [
127127
* @param fileExtension Optional file extension
128128
* @returns The detected encoding
129129
*/
130-
export async function detectEncoding(fileBuffer: Buffer, fileExtension?: string): Promise<string> {
130+
export async function detectEncoding(fileBuffer: Buffer, fileExtension?: string, filePath?: string): Promise<string> {
131131
// 1. First check if it's a known binary file extension
132132
if (fileExtension && BINARY_EXTENSIONS.has(fileExtension)) {
133133
throw new Error(`Cannot read text for file type: ${fileExtension}`)
@@ -172,6 +172,8 @@ export async function detectEncoding(fileBuffer: Buffer, fileExtension?: string)
172172
encoding = "utf8"
173173
}
174174

175+
console.log(`${filePath} encoding with ${encoding}`)
176+
175177
return encoding
176178
}
177179

@@ -184,7 +186,7 @@ export async function readFileWithEncodingDetection(filePath: string): Promise<s
184186
const buffer = await fs.readFile(filePath)
185187
const fileExtension = path.extname(filePath).toLowerCase()
186188

187-
const encoding = await detectEncoding(buffer, fileExtension)
189+
const encoding = await detectEncoding(buffer, fileExtension, filePath)
188190
return iconv.decode(buffer, encoding)
189191
}
190192

@@ -197,7 +199,7 @@ export async function detectFileEncoding(filePath: string): Promise<string> {
197199
try {
198200
const buffer = await fs.readFile(filePath)
199201
const fileExtension = path.extname(filePath).toLowerCase()
200-
return await detectEncoding(buffer, fileExtension)
202+
return await detectEncoding(buffer, fileExtension, filePath)
201203
} catch (error) {
202204
// File does not exist or cannot be read, default to UTF-8
203205
return "utf8"
@@ -228,7 +230,7 @@ export async function isBinaryFileWithEncodingDetection(filePath: string): Promi
228230
}
229231
// Try to detect encoding first
230232
try {
231-
await detectEncoding(fileBuffer, fileExtension)
233+
await detectEncoding(fileBuffer, fileExtension, filePath)
232234
// If detectEncoding succeeds, it's a text file
233235
return false
234236
} catch (error) {
@@ -253,12 +255,15 @@ export async function writeFileWithEncodingPreservation(filePath: string, conten
253255
const originalEncoding = await detectFileEncoding(filePath)
254256

255257
// If original file is UTF-8 or does not exist, write directly
256-
if (originalEncoding === "utf8") {
258+
if (!originalEncoding || ["utf8", "ascii"].includes(originalEncoding.toLocaleLowerCase())) {
259+
console.log(`${filePath} encoding with utf8`)
260+
257261
await fs.writeFile(filePath, content, "utf8")
258262
return
259263
}
260264

261265
// Convert UTF-8 content to original file encoding
262266
const encodedBuffer = iconv.encode(content, originalEncoding)
267+
console.log(`${filePath} encoding with ${originalEncoding}`)
263268
await fs.writeFile(filePath, encodedBuffer)
264269
}

0 commit comments

Comments
 (0)