Skip to content

Commit 0bb3ae7

Browse files
authored
Replace utf8 package with using TextDecoder API (#466)
This fixes the assumption that every GDB on every host supports and starts up with UTF-8 host charset. This caused errors if this is not the case. * Replace use of utf8 package with TextDecoder API. * Detect and use host charset encoding and apply it to octal char sequences. Updates with each charset change. * Synchronize changed charsets between MAIN and AUX GDB backends. * Add fixup map for encodings not known to whatwg standard, e.g. Windows CP65001 which effectively is utf-8. * Explicitly set charset to UTF-8 for Windows unicode path test, we may have a machine/GDB combination which uses CP1252 if 'auto'. --------- Signed-off-by: Jens Reinecke <[email protected]>
1 parent ac70e85 commit 0bb3ae7

File tree

8 files changed

+249
-39
lines changed

8 files changed

+249
-39
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Change Log
22

3+
## Unreleased
4+
5+
- Fixes [`465`](https://github.com/eclipse-cdt-cloud/cdt-gdb-adapter/issues/465): UTF-8 'Failed to decode cstring' errors for GDB with CP1252 support only.
6+
37
## 1.4.1
48

59
- Fixes [`400`](https://github.com/eclipse-cdt-cloud/cdt-gdb-adapter/issues/400): Evaluation of variables to support RTOS Views extension.

package.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@
6464
"@vscode/debugadapter": "^1.68.0",
6565
"@vscode/debugprotocol": "^1.68.0",
6666
"node-addon-api": "^8.4.0",
67-
"serialport": "^13.0.0",
68-
"utf8": "^3.0.0"
67+
"serialport": "^13.0.0"
6968
},
7069
"devDependencies": {
7170
"@istanbuljs/nyc-config-typescript": "^1.0.2",
@@ -76,7 +75,6 @@
7675
"@types/serialport": "^10.2.0",
7776
"@types/sinon": "^17.0.4",
7877
"@types/tmp": "^0.2.6",
79-
"@types/utf8": "^3.0.3",
8078
"@typescript-eslint/eslint-plugin": "^8.35.1",
8179
"@typescript-eslint/parser": "^8.35.1",
8280
"@vscode/debugadapter-testsupport": "^1.68.0",

src/MIParser.ts

Lines changed: 61 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
*********************************************************************/
1010
import { Readable } from 'stream';
1111
import { IGDBBackend } from './types/gdb';
12-
import * as utf8 from 'utf8';
1312
import { NamedLogger } from './namedLogger';
1413

1514
interface Command {
@@ -21,8 +20,22 @@ type CommandQueue = {
2120
[key: string]: Command;
2221
};
2322

23+
const DEFAULT_CHARSET = 'utf-8';
24+
/**
25+
* Fixups for non-whatwg charset encodings in GDB known to effectively being
26+
* aliases for whatwg encodings.
27+
*/
28+
const FIXUPS_CHARSET = new Map<string, string>([['cp65001', 'utf-8']]);
29+
2430
export class MIParser {
25-
protected logger;
31+
/**
32+
* Encoding for GDB/MI cstring characters outside standard ASCII range.
33+
* Value has to match names known to TextDecoder API, see also
34+
* whatwg encodings (https://encoding.spec.whatwg.org/).
35+
* Defaults to 'utf-8' to match previous behavior.
36+
*/
37+
protected _hostCharset = DEFAULT_CHARSET;
38+
protected logger: NamedLogger;
2639
protected line = '';
2740
protected pos = 0;
2841

@@ -36,6 +49,19 @@ export class MIParser {
3649
this.logger = new NamedLogger(name);
3750
}
3851

52+
public set hostCharset(value: string | undefined) {
53+
// Empty string not included in map, returns undefined.
54+
const fixup = FIXUPS_CHARSET.get(value ?? '');
55+
this._hostCharset = fixup ?? value ?? DEFAULT_CHARSET;
56+
this.logger.verbose(
57+
`MIParser: Decoding GDB host character set as '${this._hostCharset}'`
58+
);
59+
}
60+
61+
public get hostCharset(): string | undefined {
62+
return this._hostCharset;
63+
}
64+
3965
public cancelQueue() {
4066
// Entries in the form of [key, callback]
4167
const entries = Object.entries(this.commandQueue);
@@ -120,6 +146,21 @@ export class MIParser {
120146
return token;
121147
}
122148

149+
protected decodeCStringBytes(encodedBytes: number[]): string {
150+
const buffer = Buffer.from(encodedBytes);
151+
try {
152+
return new TextDecoder(this._hostCharset, { fatal: true }).decode(
153+
buffer
154+
);
155+
} catch (err) {
156+
this.logger.error(
157+
`Failed to decode code points (${this._hostCharset}) '${encodedBytes}'. ${JSON.stringify(err)}`
158+
);
159+
}
160+
// Return something even if garbage
161+
return String.fromCodePoint(...encodedBytes);
162+
}
163+
123164
protected handleCString() {
124165
let c = this.next();
125166
if (!c || c !== '"') {
@@ -128,21 +169,33 @@ export class MIParser {
128169

129170
let cstring = '';
130171
let octal = '';
172+
let encodedBytes = [];
131173
mainloop: for (c = this.next(); c; c = this.next()) {
132174
if (octal) {
133175
octal += c;
134176
if (octal.length == 3) {
135-
cstring += String.fromCodePoint(parseInt(octal, 8));
177+
// Octal sequence complete, save code point
178+
encodedBytes.push(parseInt(octal, 8));
136179
octal = '';
137180
}
138181
continue;
139182
}
183+
// End of octal sequence, convert accumulated code points
184+
if (c !== '\\' && encodedBytes.length) {
185+
cstring += this.decodeCStringBytes(encodedBytes);
186+
encodedBytes = [];
187+
}
140188
switch (c) {
141189
case '"':
142190
break mainloop;
143191
case '\\':
144192
c = this.next();
145193
if (c) {
194+
// End of octal sequence, convert and add accumulated code points
195+
if (encodedBytes.length && (c < '0' || c > '7')) {
196+
cstring += this.decodeCStringBytes(encodedBytes);
197+
encodedBytes = [];
198+
}
146199
switch (c) {
147200
case 'n':
148201
cstring += '\n';
@@ -173,15 +226,12 @@ export class MIParser {
173226
cstring += c;
174227
}
175228
}
176-
177-
try {
178-
return utf8.decode(cstring);
179-
} catch (err) {
180-
this.logger.error(
181-
`Failed to decode cstring '${cstring}'. ${JSON.stringify(err)}`
182-
);
183-
return cstring;
229+
// Remaining code points, convert and add accumulated code points
230+
if (encodedBytes.length) {
231+
cstring += this.decodeCStringBytes(encodedBytes);
232+
encodedBytes = [];
184233
}
234+
return cstring;
185235
}
186236

187237
protected handleString() {

src/constants/gdb.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,15 @@ export const RESUME_COMMANDS = [
4848
'until',
4949
'u',
5050
];
51+
52+
export const SET_HOSTCHARSET_REGEXPS = [
53+
/-gdb-set\s+(host-charset|charset)\s+.*/,
54+
/-interpreter-exec\s+console\s+"set\s+(host-charset|charset)\s+.*/,
55+
/set\s+(host-charset|charset)\s+.*/,
56+
];
57+
58+
export const SET_ALL_CHARSET_REGEXPS = [
59+
/-gdb-set\s+(charset|host-charset|target-charset|target-wide-charset)\s+.*/,
60+
/-interpreter-exec\s+console\s+"set\s+(charset|host-charset|target-charset|target-wide-charset)\s+.*/,
61+
/set\s+(charset|host-charset|target-charset|target-wide-charset)\s+.*/,
62+
];

src/gdb/GDBBackend.ts

Lines changed: 107 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,18 @@ import {
2222
sendExecInterrupt,
2323
} from '../mi';
2424
import { VarManager } from '../varManager';
25+
import { SET_HOSTCHARSET_REGEXPS } from '../constants/gdb';
2526
import { IGDBBackend, IGDBProcessManager, IStdioProcess } from '../types/gdb';
2627
import { MIParser } from '../MIParser';
2728
import { compareVersions } from '../util/compareVersions';
2829
import { isProcessActive } from '../util/processes';
2930
import { NamedLogger } from '../namedLogger';
3031

32+
// Expected console output for interpreter command 'show host-charset'
33+
// if setting is 'auto'.
34+
const HOST_CHARSET_REGEXP =
35+
/The host character set is \"auto; currently (.+)\"./;
36+
3137
type WriteCallback = (error: Error | null | undefined) => void;
3238

3339
export class GDBBackend extends events.EventEmitter implements IGDBBackend {
@@ -57,6 +63,90 @@ export class GDBBackend extends events.EventEmitter implements IGDBBackend {
5763
return this.varMgr;
5864
}
5965

66+
/**
67+
* Send interpreter command to show host charset and wait until/resolve when
68+
* stdout contains expected line or timeout. Don't throw, but return undefined
69+
* in case of error/timeout.
70+
*/
71+
private async getAutoHostCharsetFromConsole(): Promise<string | undefined> {
72+
return new Promise<string | undefined>(async (resolve) => {
73+
let resolved = false;
74+
let charset: string | undefined;
75+
76+
// Call when promise is read to resolve,
77+
// cleans up lister and timeout and calls resolve.
78+
const done = () => {
79+
if (!resolved) {
80+
resolved = true;
81+
this.off('consoleStreamOutput', logListener);
82+
clearTimeout(timeout);
83+
resolve(charset);
84+
}
85+
};
86+
87+
// Temporary listener looking out for console output
88+
const logListener = (output: string, category: string) => {
89+
if (category !== 'stdout') {
90+
return; // Expected output only on stdout
91+
}
92+
const match = HOST_CHARSET_REGEXP.exec(output);
93+
if (!match) {
94+
return; // No match, continue waiting
95+
}
96+
// Match, call it done even if no valid charset.
97+
// Use lower case encoding name as defined used TextDecoder.
98+
charset = match[1]?.toLowerCase();
99+
done();
100+
};
101+
102+
this.on('consoleStreamOutput', logListener);
103+
// Timeout to avoid lockup if something's wrong or stdout is missing.
104+
const timeout = setTimeout(() => {
105+
this.logger.error(
106+
'Error detecting host character set from stdout: timeout'
107+
);
108+
done();
109+
}, 500);
110+
111+
try {
112+
await this.sendCommand(
113+
'-interpreter-exec console "show host-charset"'
114+
);
115+
} catch (error) {
116+
// Command failed
117+
this.logger.error(
118+
`Error detecting host character set from stdout: ${error}`
119+
);
120+
done();
121+
}
122+
});
123+
}
124+
125+
/**
126+
* Get host character set encoding. Try MI -gdb-show first, then interpreter
127+
* console if 'auto' as this is the only found way to get the actual encoding.
128+
*/
129+
private async getHostCharset(): Promise<string | undefined> {
130+
try {
131+
const charsetResponse = await this.sendGDBShow('host-charset');
132+
// Use lower case as defined for later used TextDecoder
133+
const charset = charsetResponse?.value?.toLowerCase();
134+
if (charset !== 'auto') {
135+
// undefined or value other than 'auto'
136+
return charset;
137+
}
138+
} catch (error) {
139+
this.logger.error(`Error getting GDB host-charset: ${error}`);
140+
return undefined;
141+
}
142+
// 'auto' detected, get actual charset through interpreter console
143+
return await this.getAutoHostCharsetFromConsole();
144+
}
145+
146+
private async updateCStringDecoder(): Promise<void> {
147+
this.parser.hostCharset = await this.getHostCharset();
148+
}
149+
60150
public async spawn(
61151
requestArgs: LaunchRequestArguments | AttachRequestArguments
62152
) {
@@ -92,6 +182,7 @@ export class GDBBackend extends events.EventEmitter implements IGDBBackend {
92182
this.asyncRequestedExplicitly = !!(
93183
requestArgs.gdbAsync || requestArgs.gdbNonStop
94184
);
185+
await this.updateCStringDecoder();
95186
await this.setNonStopMode(requestArgs.gdbNonStop);
96187
await this.setAsyncMode(requestArgs.gdbAsync);
97188
}
@@ -216,10 +307,21 @@ export class GDBBackend extends events.EventEmitter implements IGDBBackend {
216307
}
217308
}
218309

219-
public sendCommand<T>(command: string): Promise<T> {
310+
/**
311+
* Post-processes a GDB command.
312+
* @param expression Command to be executed
313+
*/
314+
private async postProcessCommand(expression: string): Promise<void> {
315+
if (SET_HOSTCHARSET_REGEXPS.some((regex) => regex.test(expression))) {
316+
// Update GDB host charset info
317+
await this.updateCStringDecoder();
318+
}
319+
}
320+
321+
public async sendCommand<T>(command: string): Promise<T> {
220322
const token = this.nextToken();
221323
this.logger.verbose(`GDB command: ${token} ${command}`);
222-
return new Promise<T>((resolve, reject) => {
324+
const result = await new Promise<T>((resolve, reject) => {
223325
if (this.out) {
224326
/* Set error to capture the stack where the request originated,
225327
not the stack of reading the stream and parsing the message.
@@ -275,6 +377,9 @@ export class GDBBackend extends events.EventEmitter implements IGDBBackend {
275377
reject(new Error('gdb is not running.'));
276378
}
277379
});
380+
// Post-process command after successful execution
381+
await this.postProcessCommand(command);
382+
return result;
278383
}
279384

280385
public sendEnablePrettyPrint() {

0 commit comments

Comments
 (0)