Skip to content

Commit 64acd02

Browse files
authored
Merge pull request #6614 from Marcono1234/marcono1234/char-literal-codepoint
Java: Add `CharacterLiteral.getCodePointValue()`
2 parents da5d10f + 301a907 commit 64acd02

File tree

9 files changed

+79
-59
lines changed

9 files changed

+79
-59
lines changed

java/ql/lib/semmle/code/java/Expr.qll

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -298,18 +298,15 @@ class CompileTimeConstantExpr extends Expr {
298298
*
299299
* Note that this does not handle the following cases:
300300
*
301-
* - values of type `long`,
302-
* - `char` literals.
301+
* - values of type `long`.
303302
*/
304303
cached
305304
int getIntValue() {
306305
exists(IntegralType t | this.getType() = t | t.getName().toLowerCase() != "long") and
307306
(
308-
exists(string lit | lit = this.(Literal).getValue() |
309-
// `char` literals may get parsed incorrectly, so disallow.
310-
not this instanceof CharacterLiteral and
311-
result = lit.toInt()
312-
)
307+
result = this.(IntegerLiteral).getIntValue()
308+
or
309+
result = this.(CharacterLiteral).getCodePointValue()
313310
or
314311
exists(CastExpr cast, int val |
315312
cast = this and val = cast.getExpr().(CompileTimeConstantExpr).getIntValue()
@@ -719,6 +716,22 @@ class DoubleLiteral extends Literal, @doubleliteral {
719716
/** A character literal. For example, `'\n'`. */
720717
class CharacterLiteral extends Literal, @characterliteral {
721718
override string getAPrimaryQlClass() { result = "CharacterLiteral" }
719+
720+
/**
721+
* Gets a string which consists of the single character represented by
722+
* this literal.
723+
*
724+
* Unicode surrogate characters (U+D800 to U+DFFF) have the replacement character
725+
* U+FFFD as result instead.
726+
*/
727+
override string getValue() { result = super.getValue() }
728+
729+
/**
730+
* Gets the Unicode code point value of the character represented by
731+
* this literal. The result is the same as if the Java code had cast
732+
* the character to an `int`.
733+
*/
734+
int getCodePointValue() { result.toUnicode() = this.getValue() }
722735
}
723736

724737
/**

java/ql/lib/semmle/code/java/Type.qll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1123,7 +1123,10 @@ predicate erasedHaveIntersection(RefType t1, RefType t2) {
11231123
t2 = erase(_)
11241124
}
11251125

1126-
/** An integral type, which may be either a primitive or a boxed type. */
1126+
/**
1127+
* An integral type, which may be either a primitive or a boxed type.
1128+
* This includes the types `char` and `Character`.
1129+
*/
11271130
class IntegralType extends Type {
11281131
IntegralType() {
11291132
exists(string name |

java/ql/test/library-tests/constants/constants/Values.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ void values(final int notConstant) {
1616
int binary_literal = 0b101010; //42
1717
int negative_binary_literal = -0b101010; //-42
1818
int binary_literal_underscores = 0b1_0101_0; //42
19-
char char_literal = '*'; //Not handled
19+
char char_literal = '*'; //42
2020
long long_literal = 42L; //Not handled
2121
boolean boolean_literal = true; //true
2222
Integer boxed_int = new Integer(42); //Not handled
@@ -30,7 +30,7 @@ void values(final int notConstant) {
3030
byte downcast_byte_4 = (byte) 214; // -42
3131
byte downcast_byte_5 = (byte) (-214); // 42
3232
short downcast_short = (short) 32768; // -32768
33-
int cast_of_non_constant = (int) '*'; //Not handled
33+
int cast_of_non_constant = (int) '*'; //42
3434
long cast_to_long = (long) 42; //Not handled
3535

3636
int unary_plus = +42; //42

java/ql/test/library-tests/constants/getIntValue.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
| constants/Values.java:16:30:16:37 | 0b101010 | 42 |
1010
| constants/Values.java:17:39:17:47 | -... | -42 |
1111
| constants/Values.java:18:42:18:51 | 0b1_0101_0 | 42 |
12+
| constants/Values.java:19:29:19:31 | '*' | 42 |
1213
| constants/Values.java:25:20:25:27 | (...)... | 42 |
1314
| constants/Values.java:26:25:26:33 | (...)... | 42 |
1415
| constants/Values.java:27:32:27:43 | (...)... | -42 |
@@ -17,6 +18,7 @@
1718
| constants/Values.java:30:32:30:41 | (...)... | -42 |
1819
| constants/Values.java:31:32:31:44 | (...)... | 42 |
1920
| constants/Values.java:32:32:32:44 | (...)... | -32768 |
21+
| constants/Values.java:33:36:33:44 | (...)... | 42 |
2022
| constants/Values.java:36:26:36:28 | +... | 42 |
2123
| constants/Values.java:39:27:39:29 | -... | -42 |
2224
| constants/Values.java:43:27:43:28 | ~... | -1 |

java/ql/test/library-tests/literals/charLiterals/CharLiterals.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ public class CharLiterals {
1313
'\\',
1414
'\'',
1515
'\123', // octal escape sequence for 'S'
16+
// CodeQL uses U+FFFD for unpaired surrogates, see https://github.com/github/codeql/issues/6611
1617
'\uD800', // high surrogate
1718
'\uDC00', // low surrogate
1819
// Using Unicode escapes (which are handled during pre-processing)
Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
1-
| CharLiterals.java:5:3:5:5 | 'a' | a |
2-
| CharLiterals.java:6:3:6:10 | '\\u0061' | a |
3-
| CharLiterals.java:7:3:7:10 | '\\u0000' | \u0000 |
4-
| CharLiterals.java:8:3:8:10 | '\\uFFFF' | \uffff |
5-
| CharLiterals.java:9:3:9:10 | '\\ufFfF' | \uffff |
6-
| CharLiterals.java:10:3:10:6 | '\\0' | \u0000 |
7-
| CharLiterals.java:11:3:11:6 | '\\n' | \n |
8-
| CharLiterals.java:12:3:12:5 | '"' | " |
9-
| CharLiterals.java:13:3:13:6 | '\\\\' | \\ |
10-
| CharLiterals.java:14:3:14:6 | '\\'' | ' |
11-
| CharLiterals.java:15:3:15:8 | '\\123' | S |
12-
| CharLiterals.java:16:3:16:10 | '\\uD800' | \ufffd |
13-
| CharLiterals.java:17:3:17:10 | '\\uDC00' | \ufffd |
14-
| CharLiterals.java:19:3:19:16 | '\\u005C\\u005C' | \\ |
15-
| CharLiterals.java:20:3:20:16 | '\\u005C\\u0027' | ' |
16-
| CharLiterals.java:21:8:21:15 | 7a\\u0027 | a |
17-
| CharLiterals.java:26:4:26:6 | 'a' | a |
18-
| CharLiterals.java:27:4:27:6 | 'a' | a |
19-
| CharLiterals.java:32:3:32:5 | 'a' | a |
20-
| CharLiterals.java:32:9:32:11 | 'b' | b |
1+
| CharLiterals.java:5:3:5:5 | 'a' | a | 97 |
2+
| CharLiterals.java:6:3:6:10 | '\\u0061' | a | 97 |
3+
| CharLiterals.java:7:3:7:10 | '\\u0000' | \u0000 | 0 |
4+
| CharLiterals.java:8:3:8:10 | '\\uFFFF' | \uffff | 65535 |
5+
| CharLiterals.java:9:3:9:10 | '\\ufFfF' | \uffff | 65535 |
6+
| CharLiterals.java:10:3:10:6 | '\\0' | \u0000 | 0 |
7+
| CharLiterals.java:11:3:11:6 | '\\n' | \n | 10 |
8+
| CharLiterals.java:12:3:12:5 | '"' | " | 34 |
9+
| CharLiterals.java:13:3:13:6 | '\\\\' | \\ | 92 |
10+
| CharLiterals.java:14:3:14:6 | '\\'' | ' | 39 |
11+
| CharLiterals.java:15:3:15:8 | '\\123' | S | 83 |
12+
| CharLiterals.java:17:3:17:10 | '\\uD800' | \ufffd | 65533 |
13+
| CharLiterals.java:18:3:18:10 | '\\uDC00' | \ufffd | 65533 |
14+
| CharLiterals.java:20:3:20:16 | '\\u005C\\u005C' | \\ | 92 |
15+
| CharLiterals.java:21:3:21:16 | '\\u005C\\u0027' | ' | 39 |
16+
| CharLiterals.java:22:8:22:15 | 7a\\u0027 | a | 97 |
17+
| CharLiterals.java:27:4:27:6 | 'a' | a | 97 |
18+
| CharLiterals.java:28:4:28:6 | 'a' | a | 97 |
19+
| CharLiterals.java:33:3:33:5 | 'a' | a | 97 |
20+
| CharLiterals.java:33:9:33:11 | 'b' | b | 98 |
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
import semmle.code.java.Expr
22

33
from CharacterLiteral lit
4-
select lit, lit.getValue()
4+
select lit, lit.getValue(), lit.getCodePointValue()

java/ql/test/library-tests/literals/stringLiterals/StringLiterals.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ public class StringLiterals {
2424
"\uD800\uDC00", // surrogate pair
2525
"\uDBFF\uDFFF", // U+10FFFF
2626
// Unpaired surrogates
27+
// CodeQL uses U+FFFD for them, see https://github.com/github/codeql/issues/6611
2728
"\uD800",
2829
"\uDC00",
2930
"hello\uD800hello\uDC00world", // malformed surrogates

java/ql/test/library-tests/literals/stringLiterals/stringLiterals.expected

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,32 +17,32 @@
1717
| StringLiterals.java:23:3:23:18 | "\\uaBcDeF\\u0aB1" | \uabcdeF\u0ab1 | \uabcdeF\u0ab1 | |
1818
| StringLiterals.java:24:3:24:16 | "\\uD800\\uDC00" | \ud800\udc00 | \ud800\udc00 | |
1919
| StringLiterals.java:25:3:25:16 | "\\uDBFF\\uDFFF" | \udbff\udfff | \udbff\udfff | |
20-
| StringLiterals.java:27:3:27:10 | "\\uD800" | \ufffd | \ufffd | |
21-
| StringLiterals.java:28:3:28:10 | "\\uDC00" | \ufffd | \ufffd | |
22-
| StringLiterals.java:29:3:29:31 | "hello\\uD800hello\\uDC00world" | hello\ufffdhello\ufffdworld | hello\ufffdhello\ufffdworld | |
23-
| StringLiterals.java:31:3:31:16 | "\\u005C\\u0022" | " | " | |
24-
| StringLiterals.java:32:8:32:20 | 2\\u0061\\u0022 | a | a | |
25-
| StringLiterals.java:37:3:39:5 | """ \t \n\t\ttest "text" and escaped \\u0022\n\t\t""" | test "text" and escaped "\n | test "text" and escaped "\n | text-block |
26-
| StringLiterals.java:41:3:43:5 | """\n\t\t\tindented\n\t\t""" | \tindented\n | \tindented\n | text-block |
27-
| StringLiterals.java:44:3:46:5 | """\n\tno indentation last line\n\t\t""" | no indentation last line\n | no indentation last line\n | text-block |
28-
| StringLiterals.java:47:3:49:7 | """\n\tindentation last line\n\t\t\\s""" | indentation last line\n\t | indentation last line\n\t | text-block |
29-
| StringLiterals.java:50:3:52:6 | """\n\t\t\tnot-indented\n\t\t\t""" | not-indented\n | not-indented\n | text-block |
30-
| StringLiterals.java:53:3:55:4 | """\n\t\tindented\n\t""" | \tindented\n | \tindented\n | text-block |
31-
| StringLiterals.java:56:4:58:5 | """\n\t\tnot-indented\n\t\t""" | not-indented\n | not-indented\n | text-block |
32-
| StringLiterals.java:59:3:62:6 | """\n\t\t spaces (only single space is trimmed)\n\t\t\ttab\n\t\t\t""" | spaces (only single space is trimmed)\ntab\n | spaces (only single space is trimmed)\ntab\n | text-block |
33-
| StringLiterals.java:63:3:64:22 | """\n\t\t\tend on same line""" | end on same line | end on same line | text-block |
34-
| StringLiterals.java:65:3:68:5 | """\n\t\ttrailing spaces ignored: \t \n\t\tnot ignored: \t \\s\n\t\t""" | trailing spaces ignored:\nnot ignored: \t \n | trailing spaces ignored:\nnot ignored: \t \n | text-block |
35-
| StringLiterals.java:69:3:70:18 | """\n\t\t3 quotes:""\\"""" | 3 quotes:""" | 3 quotes:""" | text-block |
36-
| StringLiterals.java:71:3:74:5 | """\n\t\tline \\\n\t\tcontinuation \\\n\t\t""" | line continuation | line continuation | text-block |
37-
| StringLiterals.java:75:3:79:5 | """\n\t\tExplicit line breaks:\\n\n\t\t\\r\\n\n\t\t\\r\n\t\t""" | Explicit line breaks:\n\n\r\n\n\r\n | Explicit line breaks:\n\n\r\n\n\r\n | text-block |
38-
| StringLiterals.java:82:10:84:16 | 2"\\u0022\n\t\ttest\n\t\t\\u0022\\uu0022" | test\n | test\n | |
39-
| StringLiterals.java:90:3:90:19 | "hello" + "world" | helloworld | helloworld | |
40-
| StringLiterals.java:91:3:92:20 | """\n\t\thello""" + "world" | helloworld | helloworld | text-block |
41-
| StringLiterals.java:93:10:93:12 | "a" | a | a | |
42-
| StringLiterals.java:94:3:94:5 | "a" | a | a | |
20+
| StringLiterals.java:28:3:28:10 | "\\uD800" | \ufffd | \ufffd | |
21+
| StringLiterals.java:29:3:29:10 | "\\uDC00" | \ufffd | \ufffd | |
22+
| StringLiterals.java:30:3:30:31 | "hello\\uD800hello\\uDC00world" | hello\ufffdhello\ufffdworld | hello\ufffdhello\ufffdworld | |
23+
| StringLiterals.java:32:3:32:16 | "\\u005C\\u0022" | " | " | |
24+
| StringLiterals.java:33:8:33:20 | 2\\u0061\\u0022 | a | a | |
25+
| StringLiterals.java:38:3:40:5 | """ \t \n\t\ttest "text" and escaped \\u0022\n\t\t""" | test "text" and escaped "\n | test "text" and escaped "\n | text-block |
26+
| StringLiterals.java:42:3:44:5 | """\n\t\t\tindented\n\t\t""" | \tindented\n | \tindented\n | text-block |
27+
| StringLiterals.java:45:3:47:5 | """\n\tno indentation last line\n\t\t""" | no indentation last line\n | no indentation last line\n | text-block |
28+
| StringLiterals.java:48:3:50:7 | """\n\tindentation last line\n\t\t\\s""" | indentation last line\n\t | indentation last line\n\t | text-block |
29+
| StringLiterals.java:51:3:53:6 | """\n\t\t\tnot-indented\n\t\t\t""" | not-indented\n | not-indented\n | text-block |
30+
| StringLiterals.java:54:3:56:4 | """\n\t\tindented\n\t""" | \tindented\n | \tindented\n | text-block |
31+
| StringLiterals.java:57:4:59:5 | """\n\t\tnot-indented\n\t\t""" | not-indented\n | not-indented\n | text-block |
32+
| StringLiterals.java:60:3:63:6 | """\n\t\t spaces (only single space is trimmed)\n\t\t\ttab\n\t\t\t""" | spaces (only single space is trimmed)\ntab\n | spaces (only single space is trimmed)\ntab\n | text-block |
33+
| StringLiterals.java:64:3:65:22 | """\n\t\t\tend on same line""" | end on same line | end on same line | text-block |
34+
| StringLiterals.java:66:3:69:5 | """\n\t\ttrailing spaces ignored: \t \n\t\tnot ignored: \t \\s\n\t\t""" | trailing spaces ignored:\nnot ignored: \t \n | trailing spaces ignored:\nnot ignored: \t \n | text-block |
35+
| StringLiterals.java:70:3:71:18 | """\n\t\t3 quotes:""\\"""" | 3 quotes:""" | 3 quotes:""" | text-block |
36+
| StringLiterals.java:72:3:75:5 | """\n\t\tline \\\n\t\tcontinuation \\\n\t\t""" | line continuation | line continuation | text-block |
37+
| StringLiterals.java:76:3:80:5 | """\n\t\tExplicit line breaks:\\n\n\t\t\\r\\n\n\t\t\\r\n\t\t""" | Explicit line breaks:\n\n\r\n\n\r\n | Explicit line breaks:\n\n\r\n\n\r\n | text-block |
38+
| StringLiterals.java:83:10:85:16 | 2"\\u0022\n\t\ttest\n\t\t\\u0022\\uu0022" | test\n | test\n | |
39+
| StringLiterals.java:91:3:91:19 | "hello" + "world" | helloworld | helloworld | |
40+
| StringLiterals.java:92:3:93:20 | """\n\t\thello""" + "world" | helloworld | helloworld | text-block |
41+
| StringLiterals.java:94:10:94:12 | "a" | a | a | |
4342
| StringLiterals.java:95:3:95:5 | "a" | a | a | |
44-
| StringLiterals.java:96:7:96:9 | "a" | a | a | |
45-
| StringLiterals.java:97:3:97:5 | "a" | a | a | |
46-
| StringLiterals.java:98:10:98:12 | "a" | a | a | |
47-
| StringLiterals.java:99:3:99:5 | "a" | a | a | |
48-
| StringLiterals.java:100:9:100:11 | "a" | a | a | |
43+
| StringLiterals.java:96:3:96:5 | "a" | a | a | |
44+
| StringLiterals.java:97:7:97:9 | "a" | a | a | |
45+
| StringLiterals.java:98:3:98:5 | "a" | a | a | |
46+
| StringLiterals.java:99:10:99:12 | "a" | a | a | |
47+
| StringLiterals.java:100:3:100:5 | "a" | a | a | |
48+
| StringLiterals.java:101:9:101:11 | "a" | a | a | |

0 commit comments

Comments
 (0)