Skip to content

Commit 137cc1a

Browse files
authored
Revert "Git - remove jschardet (microsoft#241256)" (microsoft#242469) (microsoft#242539)
This reverts commit 6fdb419.
1 parent 21d13f4 commit 137cc1a

File tree

7 files changed

+148
-23
lines changed

7 files changed

+148
-23
lines changed

extensions/git/package-lock.json

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

extensions/git/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
"statusBarItemTooltip",
3636
"tabInputMultiDiff",
3737
"tabInputTextMerge",
38-
"textDocumentEncoding",
3938
"textEditorDiffInformation",
4039
"timeline"
4140
],
@@ -3567,6 +3566,7 @@
35673566
"@vscode/iconv-lite-umd": "0.7.0",
35683567
"byline": "^5.0.0",
35693568
"file-type": "16.5.4",
3569+
"jschardet": "3.1.4",
35703570
"picomatch": "2.3.1",
35713571
"vscode-uri": "^2.0.0",
35723572
"which": "4.0.0"

extensions/git/src/commands.ts

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1628,29 +1628,19 @@ export class CommandCenter {
16281628
}
16291629

16301630
let modifiedUri = changes.modifiedUri;
1631-
let modifiedDocument: TextDocument | undefined;
1632-
16331631
if (!modifiedUri) {
16341632
const textEditor = window.activeTextEditor;
16351633
if (!textEditor) {
16361634
return;
16371635
}
1638-
1639-
modifiedDocument = textEditor.document;
1636+
const modifiedDocument = textEditor.document;
16401637
modifiedUri = modifiedDocument.uri;
16411638
}
1642-
16431639
if (modifiedUri.scheme !== 'file') {
16441640
return;
16451641
}
1646-
1647-
if (!modifiedDocument) {
1648-
modifiedDocument = await workspace.openTextDocument(modifiedUri);
1649-
}
1650-
16511642
const result = changes.originalWithModifiedChanges;
1652-
await this.runByRepository(modifiedUri, async (repository, resource) =>
1653-
await repository.stage(resource, result, modifiedDocument.encoding));
1643+
await this.runByRepository(modifiedUri, async (repository, resource) => await repository.stage(resource, result));
16541644
}
16551645

16561646
@command('git.stageSelectedRanges', { diff: true })
@@ -1827,8 +1817,7 @@ export class CommandCenter {
18271817
const originalDocument = await workspace.openTextDocument(originalUri);
18281818
const result = applyLineChanges(originalDocument, modifiedDocument, changes);
18291819

1830-
await this.runByRepository(modifiedUri, async (repository, resource) =>
1831-
await repository.stage(resource, result, modifiedDocument.encoding));
1820+
await this.runByRepository(modifiedUri, async (repository, resource) => await repository.stage(resource, result));
18321821
}
18331822

18341823
@command('git.revertChange')
@@ -2000,7 +1989,7 @@ export class CommandCenter {
20001989
this.logger.trace(`[CommandCenter][unstageSelectedRanges] invertedDiffs: ${JSON.stringify(invertedDiffs)}`);
20011990

20021991
const result = applyLineChanges(modifiedDocument, originalDocument, invertedDiffs);
2003-
await repository.stage(modifiedUri, result, modifiedDocument.encoding);
1992+
await repository.stage(modifiedUri, result);
20041993
}
20051994

20061995
@command('git.unstageFile')

extensions/git/src/encoding.ts

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
6+
import * as jschardet from 'jschardet';
7+
8+
function detectEncodingByBOM(buffer: Buffer): string | null {
9+
if (!buffer || buffer.length < 2) {
10+
return null;
11+
}
12+
13+
const b0 = buffer.readUInt8(0);
14+
const b1 = buffer.readUInt8(1);
15+
16+
// UTF-16 BE
17+
if (b0 === 0xFE && b1 === 0xFF) {
18+
return 'utf16be';
19+
}
20+
21+
// UTF-16 LE
22+
if (b0 === 0xFF && b1 === 0xFE) {
23+
return 'utf16le';
24+
}
25+
26+
if (buffer.length < 3) {
27+
return null;
28+
}
29+
30+
const b2 = buffer.readUInt8(2);
31+
32+
// UTF-8
33+
if (b0 === 0xEF && b1 === 0xBB && b2 === 0xBF) {
34+
return 'utf8';
35+
}
36+
37+
return null;
38+
}
39+
40+
const IGNORE_ENCODINGS = [
41+
'ascii',
42+
'utf-8',
43+
'utf-16',
44+
'utf-32'
45+
];
46+
47+
const JSCHARDET_TO_ICONV_ENCODINGS: { [name: string]: string } = {
48+
'ibm866': 'cp866',
49+
'big5': 'cp950'
50+
};
51+
52+
const MAP_CANDIDATE_GUESS_ENCODING_TO_JSCHARDET: { [key: string]: string } = {
53+
utf8: 'UTF-8',
54+
utf16le: 'UTF-16LE',
55+
utf16be: 'UTF-16BE',
56+
windows1252: 'windows-1252',
57+
windows1250: 'windows-1250',
58+
iso88592: 'ISO-8859-2',
59+
windows1251: 'windows-1251',
60+
cp866: 'IBM866',
61+
iso88595: 'ISO-8859-5',
62+
koi8r: 'KOI8-R',
63+
windows1253: 'windows-1253',
64+
iso88597: 'ISO-8859-7',
65+
windows1255: 'windows-1255',
66+
iso88598: 'ISO-8859-8',
67+
cp950: 'Big5',
68+
shiftjis: 'SHIFT_JIS',
69+
eucjp: 'EUC-JP',
70+
euckr: 'EUC-KR',
71+
gb2312: 'GB2312'
72+
};
73+
74+
export function detectEncoding(buffer: Buffer, candidateGuessEncodings: string[]): string | null {
75+
const result = detectEncodingByBOM(buffer);
76+
77+
if (result) {
78+
return result;
79+
}
80+
81+
candidateGuessEncodings = candidateGuessEncodings.map(e => MAP_CANDIDATE_GUESS_ENCODING_TO_JSCHARDET[e]).filter(e => !!e);
82+
83+
const detected = jschardet.detect(buffer, candidateGuessEncodings.length > 0 ? { detectEncodings: candidateGuessEncodings } : undefined);
84+
if (!detected || !detected.encoding) {
85+
return null;
86+
}
87+
88+
const encoding = detected.encoding;
89+
90+
// Ignore encodings that cannot guess correctly
91+
// (http://chardet.readthedocs.io/en/latest/supported-encodings.html)
92+
if (0 <= IGNORE_ENCODINGS.indexOf(encoding.toLowerCase())) {
93+
return null;
94+
}
95+
96+
const normalizedEncodingName = encoding.replace(/[^a-zA-Z0-9]/g, '').toLowerCase();
97+
const mapped = JSCHARDET_TO_ICONV_ENCODINGS[normalizedEncodingName];
98+
99+
return mapped || normalizedEncodingName;
100+
}

extensions/git/src/git.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import * as iconv from '@vscode/iconv-lite-umd';
1414
import * as filetype from 'file-type';
1515
import { assign, groupBy, IDisposable, toDisposable, dispose, mkdirp, readBytes, detectUnicodeEncoding, Encoding, onceEvent, splitInChunks, Limiter, Versions, isWindows, pathEquals, isMacintosh, isDescendant } from './util';
1616
import { CancellationError, CancellationToken, ConfigurationChangeEvent, LogOutputChannel, Progress, Uri, workspace } from 'vscode';
17+
import { detectEncoding } from './encoding';
1718
import { Ref, RefType, Branch, Remote, ForcePushMode, GitErrorCodes, LogOptions, Change, Status, CommitOptions, RefQuery, InitOptions } from './api/git';
1819
import * as byline from 'byline';
1920
import { StringDecoder } from 'string_decoder';
@@ -1329,6 +1330,18 @@ export class Repository {
13291330
.filter(entry => !!entry);
13301331
}
13311332

1333+
async bufferString(object: string, encoding: string = 'utf8', autoGuessEncoding = false, candidateGuessEncodings: string[] = []): Promise<string> {
1334+
const stdout = await this.buffer(object);
1335+
1336+
if (autoGuessEncoding) {
1337+
encoding = detectEncoding(stdout, candidateGuessEncodings) || encoding;
1338+
}
1339+
1340+
encoding = iconv.encodingExists(encoding) ? encoding : 'utf8';
1341+
1342+
return iconv.decode(stdout, encoding);
1343+
}
1344+
13321345
async buffer(object: string): Promise<Buffer> {
13331346
const child = this.stream(['show', '--textconv', object]);
13341347

extensions/git/src/repository.ts

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import TelemetryReporter from '@vscode/extension-telemetry';
77
import * as fs from 'fs';
88
import * as path from 'path';
99
import picomatch from 'picomatch';
10+
import * as iconv from '@vscode/iconv-lite-umd';
1011
import { CancellationError, CancellationToken, CancellationTokenSource, Command, commands, Disposable, Event, EventEmitter, FileDecoration, l10n, LogLevel, LogOutputChannel, Memento, ProgressLocation, ProgressOptions, QuickDiffProvider, RelativePattern, scm, SourceControl, SourceControlInputBox, SourceControlInputBoxValidation, SourceControlInputBoxValidationType, SourceControlResourceDecorations, SourceControlResourceGroup, SourceControlResourceState, TabInputNotebookDiff, TabInputTextDiff, TabInputTextMultiDiff, ThemeColor, Uri, window, workspace, WorkspaceEdit } from 'vscode';
1112
import { ActionButton } from './actionButton';
1213
import { ApiRepository } from './api/api1';
@@ -24,6 +25,7 @@ import { StatusBarCommands } from './statusbar';
2425
import { toGitUri } from './uri';
2526
import { anyEvent, combinedDisposable, debounceEvent, dispose, EmptyDisposable, eventToPromise, filterEvent, find, getCommitShortHash, IDisposable, isDescendant, isLinuxSnap, isRemote, Limiter, onceEvent, pathEquals, relativePath } from './util';
2627
import { IFileWatcher, watch } from './watch';
28+
import { detectEncoding } from './encoding';
2729
import { ISourceControlHistoryItemDetailsProviderRegistry } from './historyItemDetailsProvider';
2830

2931
const timeout = (millis: number) => new Promise(c => setTimeout(c, millis));
@@ -1220,9 +1222,19 @@ export class Repository implements Disposable {
12201222
await this.run(Operation.Remove, () => this.repository.rm(resources.map(r => r.fsPath)));
12211223
}
12221224

1223-
async stage(resource: Uri, contents: string, encoding: string): Promise<void> {
1225+
async stage(resource: Uri, contents: string): Promise<void> {
1226+
const path = relativePath(this.repository.root, resource.fsPath).replace(/\\/g, '/');
12241227
await this.run(Operation.Stage, async () => {
1225-
const path = relativePath(this.repository.root, resource.fsPath).replace(/\\/g, '/');
1228+
const configFiles = workspace.getConfiguration('files', Uri.file(resource.fsPath));
1229+
let encoding = configFiles.get<string>('encoding') ?? 'utf8';
1230+
const autoGuessEncoding = configFiles.get<boolean>('autoGuessEncoding') === true;
1231+
const candidateGuessEncodings = configFiles.get<string[]>('candidateGuessEncodings') ?? [];
1232+
1233+
if (autoGuessEncoding) {
1234+
encoding = detectEncoding(Buffer.from(contents), candidateGuessEncodings) ?? encoding;
1235+
}
1236+
1237+
encoding = iconv.encodingExists(encoding) ? encoding : 'utf8';
12261238
await this.repository.stage(path, contents, encoding);
12271239

12281240
this._onDidChangeOriginalResource.fire(resource);
@@ -1968,15 +1980,17 @@ export class Repository implements Disposable {
19681980
async show(ref: string, filePath: string): Promise<string> {
19691981
return await this.run(Operation.Show, async () => {
19701982
const path = relativePath(this.repository.root, filePath).replace(/\\/g, '/');
1983+
const configFiles = workspace.getConfiguration('files', Uri.file(filePath));
1984+
const defaultEncoding = configFiles.get<string>('encoding');
1985+
const autoGuessEncoding = configFiles.get<boolean>('autoGuessEncoding');
1986+
const candidateGuessEncodings = configFiles.get<string[]>('candidateGuessEncodings');
19711987

19721988
try {
1973-
const content = await this.repository.buffer(`${ref}:${path}`);
1974-
return await workspace.decode(content, Uri.file(filePath));
1989+
return await this.repository.bufferString(`${ref}:${path}`, defaultEncoding, autoGuessEncoding, candidateGuessEncodings);
19751990
} catch (err) {
19761991
if (err.gitErrorCode === GitErrorCodes.WrongCase) {
19771992
const gitRelativePath = await this.repository.getGitRelativePath(ref, path);
1978-
const content = await this.repository.buffer(`${ref}:${gitRelativePath}`);
1979-
return await workspace.decode(content, Uri.file(filePath));
1993+
return await this.repository.bufferString(`${ref}:${gitRelativePath}`, defaultEncoding, autoGuessEncoding, candidateGuessEncodings);
19801994
}
19811995

19821996
throw err;

extensions/git/tsconfig.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
"../../src/vscode-dts/vscode.proposed.statusBarItemTooltip.d.ts",
2626
"../../src/vscode-dts/vscode.proposed.tabInputMultiDiff.d.ts",
2727
"../../src/vscode-dts/vscode.proposed.tabInputTextMerge.d.ts",
28-
"../../src/vscode-dts/vscode.proposed.textDocumentEncoding.d.ts",
2928
"../../src/vscode-dts/vscode.proposed.textEditorDiffInformation.d.ts",
3029
"../../src/vscode-dts/vscode.proposed.timeline.d.ts",
3130
"../types/lib.textEncoder.d.ts"

0 commit comments

Comments
 (0)