Skip to content

Commit 9bdcfc5

Browse files
committed
Fix issues handling high-precision numbers
The Double.isFinite method was incorrectly used to detect if a value would fit in a double. The method handles this properly, but it's not actually what we want - we want to know if the value will be accurately represented by a double. To resolve this, a NumberUtils class is added with a method that validates precision compatibility. Tests are added for this and all call sites to ensure the behavior remains consistent.
1 parent 4491592 commit 9bdcfc5

File tree

10 files changed

+371
-41
lines changed

10 files changed

+371
-41
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"type": "feature",
3+
"description": "Fix issues handling high-precision numbers",
4+
"pull_requests": [
5+
"[#3004](https://github.com/smithy-lang/smithy/pull/3004)"
6+
]
7+
}

smithy-jmespath/build.gradle.kts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,7 @@ description = "A standalone JMESPath parser"
1010

1111
extra["displayName"] = "Smithy :: JMESPath"
1212
extra["moduleName"] = "software.amazon.smithy.jmespath"
13+
14+
dependencies {
15+
api(project(":smithy-utils"))
16+
}

smithy-jmespath/src/main/java/software/amazon/smithy/jmespath/Lexer.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import java.util.function.Predicate;
1111
import software.amazon.smithy.jmespath.ast.LiteralExpression;
1212
import software.amazon.smithy.jmespath.evaluation.JmespathRuntime;
13+
import software.amazon.smithy.utils.NumberUtils;
1314

1415
final class Lexer<T> {
1516

@@ -468,12 +469,7 @@ private Token parseNumber() {
468469
String lexeme = sliceFrom(start);
469470

470471
try {
471-
Number number;
472-
if (lexeme.contains(".") || lexeme.toLowerCase().contains("e")) {
473-
number = Double.parseDouble(lexeme);
474-
} else {
475-
number = Long.parseLong(lexeme);
476-
}
472+
Number number = NumberUtils.parseNumber(lexeme);
477473
LiteralExpression node = new LiteralExpression(number, currentLine, currentColumn);
478474
return new Token(TokenType.NUMBER, node, currentLine, currentColumn);
479475
} catch (NumberFormatException e) {

smithy-jmespath/src/test/java/software/amazon/smithy/jmespath/LexerTest.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
import static org.hamcrest.Matchers.empty;
1111
import static org.hamcrest.Matchers.equalTo;
1212
import static org.hamcrest.Matchers.hasSize;
13+
import static org.hamcrest.Matchers.instanceOf;
1314
import static org.hamcrest.Matchers.is;
1415

16+
import java.math.BigDecimal;
1517
import java.util.ArrayList;
1618
import java.util.Arrays;
1719
import java.util.Collections;
@@ -542,6 +544,49 @@ public void parsesComplexNumbers() {
542544
assertThat(tokens.get(2).column, equalTo(25));
543545
}
544546

547+
@Test
548+
public void parsesDoublePrecisionCompatibleNumber() {
549+
List<Token> tokens = tokenize("1.5");
550+
551+
assertThat(tokens.get(0).type, equalTo(TokenType.NUMBER));
552+
Number value = tokens.get(0).value.expectNumberValue();
553+
assertThat(value, is(instanceOf(Double.class)));
554+
assertThat(value.doubleValue(), equalTo(1.5));
555+
}
556+
557+
@Test
558+
public void parsesDoublePrecisionIncompatibleNumberAsBigDecimal() {
559+
// 2^53 + 1 with a decimal triggers the float path but loses precision as a double.
560+
List<Token> tokens = tokenize("9007199254740993.0");
561+
562+
assertThat(tokens.get(0).type, equalTo(TokenType.NUMBER));
563+
Number value = tokens.get(0).value.expectNumberValue();
564+
assertThat(value, is(instanceOf(BigDecimal.class)));
565+
assertThat(value.toString(), equalTo("9007199254740993.0"));
566+
}
567+
568+
@Test
569+
public void parsesOverflowPositiveDoubleAsBigDecimal() {
570+
// 1e309 overflows Double to +Infinity, but is a valid BigDecimal.
571+
List<Token> tokens = tokenize("1e309");
572+
573+
assertThat(tokens.get(0).type, equalTo(TokenType.NUMBER));
574+
Number value = tokens.get(0).value.expectNumberValue();
575+
assertThat(value, is(instanceOf(BigDecimal.class)));
576+
assertThat(value, equalTo(new BigDecimal("1e309")));
577+
}
578+
579+
@Test
580+
public void parsesOverflowNegativeDoubleAsBigDecimal() {
581+
// -1e309 overflows Double to -Infinity, but is a valid BigDecimal.
582+
List<Token> tokens = tokenize("-1e309");
583+
584+
assertThat(tokens.get(0).type, equalTo(TokenType.NUMBER));
585+
Number value = tokens.get(0).value.expectNumberValue();
586+
assertThat(value, is(instanceOf(BigDecimal.class)));
587+
assertThat(value, equalTo(new BigDecimal("-1e309")));
588+
}
589+
545590
@Test
546591
public void detectsTopLevelInvalidSyntax() {
547592
Assertions.assertThrows(JmespathException.class, () -> Lexer.tokenize("~"));

smithy-model/src/main/java/software/amazon/smithy/model/loader/DefaultTokenizer.java

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
*/
55
package software.amazon.smithy.model.loader;
66

7-
import java.math.BigDecimal;
8-
import java.math.BigInteger;
97
import java.util.NoSuchElementException;
108
import software.amazon.smithy.model.SourceLocation;
9+
import software.amazon.smithy.utils.NumberUtils;
1110
import software.amazon.smithy.utils.SimpleParser;
1211

1312
class DefaultTokenizer implements IdlTokenizer {
@@ -318,22 +317,7 @@ private IdlToken parseComment() {
318317

319318
private IdlToken parseNumber() {
320319
try {
321-
String lexeme = ParserUtils.parseNumber(parser);
322-
if (lexeme.contains("e") || lexeme.contains("E") || lexeme.contains(".")) {
323-
double value = Double.parseDouble(lexeme);
324-
if (Double.isFinite(value)) {
325-
currentTokenNumber = value;
326-
} else {
327-
currentTokenNumber = new BigDecimal(lexeme);
328-
}
329-
} else {
330-
try {
331-
currentTokenNumber = Long.parseLong(lexeme);
332-
} catch (NumberFormatException e) {
333-
currentTokenNumber = new BigInteger(lexeme);
334-
}
335-
}
336-
320+
currentTokenNumber = NumberUtils.parseNumber(ParserUtils.parseNumber(parser));
337321
currentTokenEnd = parser.position();
338322
return currentTokenType = IdlToken.NUMBER;
339323
} catch (RuntimeException e) {

smithy-model/src/main/java/software/amazon/smithy/model/node/internal/NodeHandler.java

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66

77
import java.io.StringWriter;
88
import java.io.Writer;
9-
import java.math.BigDecimal;
10-
import java.math.BigInteger;
119
import software.amazon.smithy.model.SourceLocation;
1210
import software.amazon.smithy.model.node.ArrayNode;
1311
import software.amazon.smithy.model.node.BooleanNode;
@@ -16,6 +14,7 @@
1614
import software.amazon.smithy.model.node.NumberNode;
1715
import software.amazon.smithy.model.node.ObjectNode;
1816
import software.amazon.smithy.model.node.StringNode;
17+
import software.amazon.smithy.utils.NumberUtils;
1918
import software.amazon.smithy.utils.SmithyInternalApi;
2019

2120
@SmithyInternalApi
@@ -69,21 +68,7 @@ void endString(String string, SourceLocation location) {
6968

7069
@Override
7170
void endNumber(String string, SourceLocation location) {
72-
if (string.contains("e") || string.contains("E") || string.contains(".")) {
73-
double doubleValue = Double.parseDouble(string);
74-
if (Double.isFinite(doubleValue)) {
75-
value = new NumberNode(doubleValue, location);
76-
} else {
77-
value = new NumberNode(new BigDecimal(string), location);
78-
}
79-
} else {
80-
try {
81-
value = new NumberNode(Long.parseLong(string), location);
82-
} catch (NumberFormatException e) {
83-
value = new NumberNode(new BigInteger(string), location);
84-
}
85-
86-
}
71+
value = new NumberNode(NumberUtils.parseNumber(string), location);
8772
}
8873

8974
@Override

smithy-model/src/test/java/software/amazon/smithy/model/loader/TokenizerTest.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import static org.hamcrest.MatcherAssert.assertThat;
88
import static org.hamcrest.Matchers.equalTo;
9+
import static org.hamcrest.Matchers.instanceOf;
910
import static org.hamcrest.Matchers.is;
1011

1112
import java.util.NoSuchElementException;
@@ -199,6 +200,49 @@ public void storesCurrentTokenNumber() {
199200
assertThat(tokenizer.getCurrentTokenNumberValue(), equalTo(10L));
200201
}
201202

203+
@Test
204+
public void parsesDoublePrecisionCompatibleFloat() {
205+
IdlTokenizer tokenizer = IdlTokenizer.create("1.5");
206+
207+
assertThat(tokenizer.next(), is(IdlToken.NUMBER));
208+
Number value = tokenizer.getCurrentTokenNumberValue();
209+
assertThat(value, instanceOf(Double.class));
210+
assertThat(value.doubleValue(), equalTo(1.5));
211+
}
212+
213+
@Test
214+
public void parsesDoublePrecisionIncompatibleFloatAsBigDecimal() {
215+
// 2^53 + 1 with a decimal triggers the float path but loses precision as a double.
216+
IdlTokenizer tokenizer = IdlTokenizer.create("9007199254740993.0");
217+
218+
assertThat(tokenizer.next(), is(IdlToken.NUMBER));
219+
Number value = tokenizer.getCurrentTokenNumberValue();
220+
assertThat(value, instanceOf(java.math.BigDecimal.class));
221+
assertThat(value.toString(), equalTo("9007199254740993.0"));
222+
}
223+
224+
@Test
225+
public void parsesOverflowPositiveDoubleAsBigDecimal() {
226+
// 1e309 overflows Double to +Infinity, but is a valid BigDecimal.
227+
IdlTokenizer tokenizer = IdlTokenizer.create("1e309");
228+
229+
assertThat(tokenizer.next(), is(IdlToken.NUMBER));
230+
Number value = tokenizer.getCurrentTokenNumberValue();
231+
assertThat(value, instanceOf(java.math.BigDecimal.class));
232+
assertThat(value, equalTo(new java.math.BigDecimal("1e309")));
233+
}
234+
235+
@Test
236+
public void parsesOverflowNegativeDoubleAsBigDecimal() {
237+
// -1e309 overflows Double to -Infinity, but is a valid BigDecimal.
238+
IdlTokenizer tokenizer = IdlTokenizer.create("-1e309");
239+
240+
assertThat(tokenizer.next(), is(IdlToken.NUMBER));
241+
Number value = tokenizer.getCurrentTokenNumberValue();
242+
assertThat(value, instanceOf(java.math.BigDecimal.class));
243+
assertThat(value, equalTo(new java.math.BigDecimal("-1e309")));
244+
}
245+
202246
@Test
203247
public void throwsWhenAccessingStringValue() {
204248
IdlTokenizer tokenizer = IdlTokenizer.create("10");

smithy-model/src/test/java/software/amazon/smithy/model/node/NodeParserTest.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66

77
import static org.hamcrest.MatcherAssert.assertThat;
88
import static org.hamcrest.Matchers.equalTo;
9+
import static org.hamcrest.Matchers.instanceOf;
910
import static org.hamcrest.Matchers.is;
1011
import static org.hamcrest.Matchers.startsWith;
1112

13+
import java.math.BigDecimal;
1214
import java.util.Map;
1315
import org.junit.jupiter.api.Assertions;
1416
import org.junit.jupiter.api.Test;
@@ -69,11 +71,49 @@ public void parsesFloatNode() {
6971
Node result = Node.parse("1.5");
7072

7173
assertThat(result.isNumberNode(), is(true));
74+
assertThat(result.expectNumberNode().getValue(), is(instanceOf(Double.class)));
7275
assertThat(result.expectNumberNode().getValue(), equalTo(1.5));
7376
assertThat(result.getSourceLocation().getLine(), is(1));
7477
assertThat(result.getSourceLocation().getColumn(), is(1));
7578
}
7679

80+
@Test
81+
public void parsesDoublePrecisionIncompatibleNumberAsBigDecimal() {
82+
// 2^53 + 1 with a decimal triggers the float path but loses precision as a double.
83+
Node result = Node.parse("9007199254740993.0");
84+
assertThat(result.isNumberNode(), is(true));
85+
86+
NumberNode numberNode = result.expectNumberNode();
87+
assertThat(numberNode.getValue(), is(instanceOf(BigDecimal.class)));
88+
assertThat(numberNode.getValue().toString(), equalTo("9007199254740993.0"));
89+
assertThat(numberNode.getSourceLocation().getLine(), is(1));
90+
assertThat(numberNode.getSourceLocation().getColumn(), is(1));
91+
}
92+
93+
@Test
94+
public void parsesOverflowPositiveDoubleAsBigDecimal() {
95+
// 1e309 overflows Double to +Infinity, but is a valid BigDecimal.
96+
Node result = Node.parse("1e309");
97+
98+
NumberNode numberNode = result.expectNumberNode();
99+
assertThat(numberNode.getValue(), is(instanceOf(BigDecimal.class)));
100+
assertThat(numberNode.getValue(), equalTo(new BigDecimal("1e309")));
101+
assertThat(numberNode.getSourceLocation().getLine(), is(1));
102+
assertThat(numberNode.getSourceLocation().getColumn(), is(1));
103+
}
104+
105+
@Test
106+
public void parsesOverflowNegativeDoubleAsBigDecimal() {
107+
// -1e309 overflows Double to -Infinity, but is a valid BigDecimal.
108+
Node result = Node.parse("-1e309");
109+
110+
NumberNode numberNode = result.expectNumberNode();
111+
assertThat(numberNode.getValue(), is(instanceOf(BigDecimal.class)));
112+
assertThat(numberNode.getValue(), equalTo(new BigDecimal("-1e309")));
113+
assertThat(numberNode.getSourceLocation().getLine(), is(1));
114+
assertThat(numberNode.getSourceLocation().getColumn(), is(1));
115+
}
116+
77117
@Test
78118
public void parsesEmptyArray() {
79119
Node result = Node.parse("[]");
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
package software.amazon.smithy.utils;
6+
7+
import java.math.BigDecimal;
8+
import java.math.BigInteger;
9+
10+
/**
11+
* Functions that make working with numbers easier.
12+
*/
13+
public final class NumberUtils {
14+
private NumberUtils() {}
15+
16+
/**
17+
* Checks whether the bigDecimalValue is represented in the doubleValue without
18+
* a loss of precision due to IEEE-754 floating-point representation.
19+
*
20+
* @param bigDecimalValue The value as a BigDecimal.
21+
* @param doubleValue The value as a double.
22+
* @return true if the bigDecimalValue is precisely represented by the doubleValue.
23+
*/
24+
public static boolean isDoublePrecisionCompatible(BigDecimal bigDecimalValue, Double doubleValue) {
25+
return bigDecimalValue.compareTo(BigDecimal.valueOf(doubleValue)) == 0;
26+
}
27+
28+
/**
29+
* Parses a decimal (floating-point or scientific notation) string into the
30+
* most precise Number type that preserves the value.
31+
*
32+
* <p>Returns a {@link Double} when the value can be represented without
33+
* precision loss. Returns a {@link BigDecimal} when the value exceeds
34+
* double range (would become infinity) or loses precision in IEEE-754.
35+
*
36+
* @param lexeme A numeric string containing "." or "e"/"E".
37+
* @return A Double or BigDecimal representing the parsed value.
38+
*/
39+
public static Number parseDecimalNumber(String lexeme) {
40+
Double doubleValue = Double.valueOf(lexeme);
41+
if (doubleValue.isNaN()) {
42+
return doubleValue;
43+
}
44+
if (lexeme.contains("I")) {
45+
return doubleValue;
46+
}
47+
if (doubleValue.isInfinite()) {
48+
// Doubles parse values outside their range as infinity.
49+
// Literal infinity is handled by the `I` test, so parse the real value.
50+
return new BigDecimal(lexeme);
51+
}
52+
BigDecimal bigDecimalValue = new BigDecimal(lexeme);
53+
if (isDoublePrecisionCompatible(bigDecimalValue, doubleValue)) {
54+
return doubleValue;
55+
}
56+
return bigDecimalValue;
57+
}
58+
59+
/**
60+
* Parses a numeric string into the most appropriate Number type.
61+
*
62+
* <p>Strings containing ".", "e", or "E" are treated as decimal numbers
63+
* and parsed via {@link #parseDecimalNumber}. All other strings are
64+
* parsed as integers, trying {@link Long} first and falling back to
65+
* {@link BigInteger} for values that overflow long.
66+
*
67+
* @param lexeme A numeric string.
68+
* @return A Long, BigInteger, Double, or BigDecimal representing the value.
69+
*/
70+
public static Number parseNumber(String lexeme) {
71+
if (lexeme.contains("e") || lexeme.contains("E") || lexeme.contains(".")) {
72+
return parseDecimalNumber(lexeme);
73+
}
74+
try {
75+
return Long.parseLong(lexeme);
76+
} catch (NumberFormatException e) {
77+
return new BigInteger(lexeme);
78+
}
79+
}
80+
}

0 commit comments

Comments
 (0)