Skip to content

Commit caa6e91

Browse files
committed
Add full support for HIGHCHARUNICODE
1 parent a3938fc commit caa6e91

File tree

3 files changed

+69
-20
lines changed

3 files changed

+69
-20
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- Full support for the `HIGHCHARUNICODE` compiler directive.
13+
1014
## [1.17.2] - 2025-07-03
1115

1216
### Fixed

delphi-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
import au.com.integradev.delphi.antlr.ast.visitors.DelphiParserVisitor;
2222
import au.com.integradev.delphi.preprocessor.TextBlockLineEndingMode;
23+
import java.nio.ByteBuffer;
24+
import java.nio.charset.Charset;
2325
import java.util.ArrayDeque;
2426
import java.util.Deque;
2527
import java.util.stream.Collectors;
@@ -28,6 +30,7 @@
2830
import org.apache.commons.lang3.Strings;
2931
import org.sonar.plugins.communitydelphi.api.ast.DelphiNode;
3032
import org.sonar.plugins.communitydelphi.api.ast.TextLiteralNode;
33+
import org.sonar.plugins.communitydelphi.api.directive.SwitchDirective.SwitchKind;
3134
import org.sonar.plugins.communitydelphi.api.token.DelphiTokenType;
3235
import org.sonar.plugins.communitydelphi.api.type.IntrinsicType;
3336
import org.sonar.plugins.communitydelphi.api.type.Type;
@@ -167,26 +170,38 @@ private String createSingleLineValue() {
167170
return imageBuilder.toString();
168171
}
169172

170-
private static char characterEscapeToChar(String image) {
173+
private boolean isHighCharUnicode() {
174+
return getAst()
175+
.getDelphiFile()
176+
.getCompilerSwitchRegistry()
177+
.isActiveSwitch(SwitchKind.HIGHCHARUNICODE, getTokenIndex());
178+
}
179+
180+
public Charset getAnsiCharset() {
181+
return Charset.forName(System.getProperty("native.encoding"));
182+
}
183+
184+
private char characterEscapeToChar(String image) {
171185
image = image.substring(1);
172186
int radix = 10;
173187

174-
switch (image.charAt(0)) {
175-
case '$':
176-
radix = 16;
177-
image = image.substring(1);
178-
break;
179-
case '%':
180-
radix = 2;
181-
image = image.substring(1);
182-
break;
183-
default:
184-
// do nothing
188+
if (image.charAt(0) == '$') {
189+
radix = 16;
190+
image = image.substring(1);
185191
}
186192

187193
image = StringUtils.remove(image, '_');
194+
char character = (char) Integer.parseInt(image, radix);
188195

189-
return (char) Integer.parseInt(image, radix);
196+
if (isHighCharUnicode() || character > 255) {
197+
// With HIGHCHARUNICODE ON, all escapes are interpreted as UTF-16.
198+
// Escapes above 255 are always interpreted as UTF-16.
199+
return character;
200+
} else {
201+
// With HIGHCHARUNICODE OFF, escapes between 0-255 are interpreted in the system code page.
202+
var buffer = ByteBuffer.allocate(1).put((byte) character).flip();
203+
return getAnsiCharset().decode(buffer).get();
204+
}
190205
}
191206

192207
@Override

delphi-frontend/src/test/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImplTest.java

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,24 @@
2020

2121
import static org.assertj.core.api.Assertions.assertThat;
2222
import static org.mockito.ArgumentMatchers.anyInt;
23+
import static org.mockito.ArgumentMatchers.eq;
2324
import static org.mockito.Mockito.mock;
25+
import static org.mockito.Mockito.spy;
2426
import static org.mockito.Mockito.when;
2527

2628
import au.com.integradev.delphi.antlr.DelphiLexer;
2729
import au.com.integradev.delphi.antlr.ast.DelphiAstImpl;
2830
import au.com.integradev.delphi.file.DelphiFile;
31+
import au.com.integradev.delphi.preprocessor.CompilerSwitchRegistry;
2932
import au.com.integradev.delphi.preprocessor.TextBlockLineEndingMode;
3033
import au.com.integradev.delphi.preprocessor.TextBlockLineEndingModeRegistry;
34+
import java.nio.charset.Charset;
3135
import org.antlr.runtime.CommonToken;
3236
import org.junit.jupiter.api.Test;
37+
import org.junit.jupiter.params.ParameterizedTest;
38+
import org.junit.jupiter.params.provider.ValueSource;
3339
import org.sonar.plugins.communitydelphi.api.ast.DelphiNode;
40+
import org.sonar.plugins.communitydelphi.api.directive.SwitchDirective.SwitchKind;
3441

3542
class TextLiteralNodeImplTest {
3643
@Test
@@ -59,22 +66,45 @@ void testMultilineImage() {
5966
assertThat(node.isMultiline()).isTrue();
6067
}
6168

62-
@Test
63-
void testGetImageWithCharacterEscapes() {
64-
TextLiteralNodeImpl node = new TextLiteralNodeImpl(DelphiLexer.TkTextLiteral);
69+
@ParameterizedTest(name = "HIGHCHARUNICODE = {0}")
70+
@ValueSource(booleans = {true, false})
71+
void testGetImageWithCharacterEscapes(boolean highCharUnicode) {
72+
var registry = mock(CompilerSwitchRegistry.class);
73+
when(registry.isActiveSwitch(eq(SwitchKind.HIGHCHARUNICODE), anyInt()))
74+
.thenReturn(highCharUnicode);
75+
var file = mock(DelphiFile.class);
76+
when(file.getCompilerSwitchRegistry()).thenReturn(registry);
77+
var ast = mock(DelphiAstImpl.class);
78+
when(ast.getDelphiFile()).thenReturn(file);
79+
80+
TextLiteralNodeImpl node = spy(new TextLiteralNodeImpl(DelphiLexer.TkTextLiteral));
81+
when(node.getAnsiCharset()).thenReturn(Charset.forName("windows-1252"));
82+
node.setParent(ast);
83+
6584
node.addChild(createNode(DelphiLexer.TkQuotedString, "'F'"));
6685
node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#111"));
6786
node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#111"));
6887
node.addChild(createNode(DelphiLexer.TkQuotedString, "'B'"));
6988
node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$61"));
7089
node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$72"));
7190
node.addChild(createNode(DelphiLexer.TkQuotedString, "'B'"));
72-
node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#%01100001"));
73-
node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#%01111010"));
91+
node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$80"));
92+
node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$98"));
93+
node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$A3"));
94+
node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$20AC"));
95+
node.addChild(createNode(DelphiLexer.TkQuotedString, "'az'"));
7496

75-
assertThat(node.getImage()).isEqualTo("'F'#111#111'B'#$61#$72'B'#%01100001#%01111010");
76-
assertThat(node.getValue()).isEqualTo(node.getImageWithoutQuotes()).isEqualTo("FooBarBaz");
7797
assertThat(node.isMultiline()).isFalse();
98+
assertThat(node.getImage()).isEqualTo("'F'#111#111'B'#$61#$72'B'#$80#$98#$A3#$20AC'az'");
99+
if (highCharUnicode) {
100+
assertThat(node.getValue())
101+
.isEqualTo(node.getImageWithoutQuotes())
102+
.isEqualTo("FooBarB\u0080\u0098£€az");
103+
} else {
104+
assertThat(node.getValue())
105+
.isEqualTo(node.getImageWithoutQuotes())
106+
.isEqualTo("FooBarB€˜£€az");
107+
}
78108
}
79109

80110
@Test

0 commit comments

Comments
 (0)