From 6c8013a736218d43542c6a652fe30c9e03b281a4 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Fri, 1 Aug 2025 20:14:59 +0500 Subject: [PATCH 01/17] Add ScalarLiteralNode --- lldb/include/lldb/ValueObject/DILAST.h | 23 +++++++++ lldb/include/lldb/ValueObject/DILEval.h | 2 + lldb/include/lldb/ValueObject/DILParser.h | 2 + lldb/source/ValueObject/DILAST.cpp | 5 ++ lldb/source/ValueObject/DILEval.cpp | 28 +++++++++++ lldb/source/ValueObject/DILParser.cpp | 48 +++++++++++++++++++ .../TestFrameVarDILArraySubscript.py | 2 +- .../Indirection/TestFrameVarDILIndirection.py | 2 +- 8 files changed, 110 insertions(+), 2 deletions(-) diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h index 709f0639135f1..a174e28ea5c06 100644 --- a/lldb/include/lldb/ValueObject/DILAST.h +++ b/lldb/include/lldb/ValueObject/DILAST.h @@ -23,6 +23,7 @@ enum class NodeKind { eErrorNode, eIdentifierNode, eMemberOfNode, + eScalarLiteralNode, eUnaryOpNode, }; @@ -178,6 +179,26 @@ class BitFieldExtractionNode : public ASTNode { int64_t m_last_index; }; +class ScalarLiteralNode : public ASTNode { +public: + ScalarLiteralNode(uint32_t location, lldb::BasicType type, Scalar value) + : ASTNode(location, NodeKind::eScalarLiteralNode), m_type(type), + m_value(value) {} + + llvm::Expected Accept(Visitor *v) const override; + + lldb::BasicType GetType() const { return m_type; } + Scalar GetValue() const & { return m_value; } + + static bool classof(const ASTNode *node) { + return node->GetKind() == NodeKind::eScalarLiteralNode; + } + +private: + lldb::BasicType m_type; + Scalar m_value; +}; + /// This class contains one Visit method for each specialized type of /// DIL AST node. The Visit methods are used to dispatch a DIL AST node to /// the correct function in the DIL expression evaluator for evaluating that @@ -195,6 +216,8 @@ class Visitor { Visit(const ArraySubscriptNode *node) = 0; virtual llvm::Expected Visit(const BitFieldExtractionNode *node) = 0; + virtual llvm::Expected + Visit(const ScalarLiteralNode *node) = 0; }; } // namespace lldb_private::dil diff --git a/lldb/include/lldb/ValueObject/DILEval.h b/lldb/include/lldb/ValueObject/DILEval.h index 45e29b3ddcd7b..cb2a81d1c7ba1 100644 --- a/lldb/include/lldb/ValueObject/DILEval.h +++ b/lldb/include/lldb/ValueObject/DILEval.h @@ -54,6 +54,8 @@ class Interpreter : Visitor { Visit(const ArraySubscriptNode *node) override; llvm::Expected Visit(const BitFieldExtractionNode *node) override; + llvm::Expected + Visit(const ScalarLiteralNode *node) override; // Used by the interpreter to create objects, perform casts, etc. lldb::TargetSP m_target; diff --git a/lldb/include/lldb/ValueObject/DILParser.h b/lldb/include/lldb/ValueObject/DILParser.h index 9eda7bac4a364..2cd8ca3be3c02 100644 --- a/lldb/include/lldb/ValueObject/DILParser.h +++ b/lldb/include/lldb/ValueObject/DILParser.h @@ -96,6 +96,8 @@ class DILParser { std::string ParseIdExpression(); std::string ParseUnqualifiedId(); std::optional ParseIntegerConstant(); + ASTNodeUP ParseNumericLiteral(); + ASTNodeUP ParseNumericConstant(); void BailOut(const std::string &error, uint32_t loc, uint16_t err_len); diff --git a/lldb/source/ValueObject/DILAST.cpp b/lldb/source/ValueObject/DILAST.cpp index b1cd824c2299e..38215ae18f6ce 100644 --- a/lldb/source/ValueObject/DILAST.cpp +++ b/lldb/source/ValueObject/DILAST.cpp @@ -37,4 +37,9 @@ BitFieldExtractionNode::Accept(Visitor *v) const { return v->Visit(this); } +llvm::Expected +ScalarLiteralNode::Accept(Visitor *v) const { + return v->Visit(this); +} + } // namespace lldb_private::dil diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp index 3ac200228acfd..ff2fc949e3d10 100644 --- a/lldb/source/ValueObject/DILEval.cpp +++ b/lldb/source/ValueObject/DILEval.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "lldb/ValueObject/DILEval.h" +#include "lldb/Core/Module.h" #include "lldb/Symbol/CompileUnit.h" +#include "lldb/Symbol/TypeSystem.h" #include "lldb/Symbol/VariableList.h" #include "lldb/Target/RegisterContext.h" #include "lldb/ValueObject/DILAST.h" @@ -497,4 +499,30 @@ Interpreter::Visit(const BitFieldExtractionNode *node) { return child_valobj_sp; } +static CompilerType GetBasicTypeFromCU(std::shared_ptr ctx, + lldb::BasicType basic_type) { + SymbolContext symbol_context = + ctx->GetSymbolContext(lldb::eSymbolContextCompUnit); + auto language = symbol_context.comp_unit->GetLanguage(); + + symbol_context = ctx->GetSymbolContext(lldb::eSymbolContextModule); + auto type_system = + symbol_context.module_sp->GetTypeSystemForLanguage(language); + + if (type_system) + if (auto compiler_type = type_system.get()->GetBasicTypeFromAST(basic_type)) + return compiler_type; + + return CompilerType(); +} + +llvm::Expected +Interpreter::Visit(const ScalarLiteralNode *node) { + CompilerType result_type = + GetBasicTypeFromCU(m_exe_ctx_scope, node->GetType()); + Scalar value = node->GetValue(); + return ValueObject::CreateValueObjectFromScalar(m_target, value, result_type, + "result"); +} + } // namespace lldb_private::dil diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp index eac41fab90763..91b9d764527b3 100644 --- a/lldb/source/ValueObject/DILParser.cpp +++ b/lldb/source/ValueObject/DILParser.cpp @@ -183,6 +183,8 @@ ASTNodeUP DILParser::ParsePostfixExpression() { // "(" expression ")" // ASTNodeUP DILParser::ParsePrimaryExpression() { + if (CurToken().Is(Token::numeric_constant)) + return ParseNumericLiteral(); if (CurToken().IsOneOf( {Token::coloncolon, Token::identifier, Token::l_paren})) { // Save the source location for the diagnostics message. @@ -370,6 +372,52 @@ std::optional DILParser::ParseIntegerConstant() { return std::nullopt; } +// Parse a numeric_literal. +// +// numeric_literal: +// ? Token::numeric_constant ? +// +ASTNodeUP DILParser::ParseNumericLiteral() { + Expect(Token::numeric_constant); + ASTNodeUP numeric_constant = ParseNumericConstant(); + if (numeric_constant->GetKind() == NodeKind::eErrorNode) { + BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}", + CurToken()), + CurToken().GetLocation(), CurToken().GetSpelling().length()); + return std::make_unique(); + } + m_dil_lexer.Advance(); + return numeric_constant; +} + +static constexpr std::pair type_suffixes[] = { + {"ull", lldb::eBasicTypeUnsignedLongLong}, + {"ul", lldb::eBasicTypeUnsignedLong}, + {"u", lldb::eBasicTypeUnsignedInt}, + {"ll", lldb::eBasicTypeLongLong}, + {"l", lldb::eBasicTypeLong}, +}; + +ASTNodeUP DILParser::ParseNumericConstant() { + Token token = CurToken(); + auto spelling = token.GetSpelling(); + llvm::StringRef spelling_ref = spelling; + lldb::BasicType type = lldb::eBasicTypeInt; + for (auto [suffix, t] : type_suffixes) { + if (spelling_ref.consume_back_insensitive(suffix)) { + type = t; + break; + } + } + llvm::APInt raw_value; + if (!spelling_ref.getAsInteger(0, raw_value)) { + Scalar scalar_value(raw_value); + return std::make_unique(token.GetLocation(), type, + scalar_value); + } + return std::make_unique(); +} + void DILParser::Expect(Token::Kind kind) { if (CurToken().IsNot(kind)) { BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()), diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py index e3cfb878dd415..9dcd2fe85aa5b 100644 --- a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py +++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py @@ -89,7 +89,7 @@ def test_subscript(self): self.expect( "frame var '1[2]'", error=True, - substrs=["Unexpected token"], + substrs=["subscripted value is not an array or pointer"], ) # Base should not be a pointer to void diff --git a/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py b/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py index 38c72131d797c..28eba4f1a70bc 100644 --- a/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py +++ b/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py @@ -35,7 +35,7 @@ def test_frame_var(self): self.expect( "frame variable '*1'", error=True, - substrs=["Unexpected token: <'1' (numeric_constant)>"], + substrs=["dereference failed: not a pointer, reference or array type"], ) self.expect( "frame variable '*val'", From b355c88c5e2995e02962d0a748a0e4e39796487c Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Fri, 18 Jul 2025 21:52:53 +0500 Subject: [PATCH 02/17] Lex and parse floating numbers; split number tokens to avoid double parsing --- lldb/docs/dil-expr-lang.ebnf | 3 ++ lldb/include/lldb/ValueObject/DILLexer.h | 3 +- lldb/include/lldb/ValueObject/DILParser.h | 3 +- lldb/source/ValueObject/DILLexer.cpp | 54 +++++++++++++++---- lldb/source/ValueObject/DILParser.cpp | 37 +++++++++++-- .../TestFrameVarDILArraySubscript.py | 2 +- .../frame/var-dil/expr/Arithmetic/Makefile | 3 ++ .../Arithmetic/TestFrameVarDILArithmetic.py | 30 +++++++++++ .../frame/var-dil/expr/Arithmetic/main.cpp | 3 ++ lldb/unittests/ValueObject/DILLexerTests.cpp | 21 +++++--- 10 files changed, 134 insertions(+), 25 deletions(-) create mode 100644 lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile create mode 100644 lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py create mode 100644 lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp diff --git a/lldb/docs/dil-expr-lang.ebnf b/lldb/docs/dil-expr-lang.ebnf index 783432dabd6db..da1796d936c6a 100644 --- a/lldb/docs/dil-expr-lang.ebnf +++ b/lldb/docs/dil-expr-lang.ebnf @@ -31,6 +31,9 @@ identifier = ? C99 Identifier ? ; integer_literal = ? Integer constant: hexademical, decimal, octal, binary ? ; +numeric_literal = ? Integer constant: hexademical, decimal, octal, binary ? + | ? Floating constant ? ; + register = "$" ? Register name ? ; nested_name_specifier = type_name "::" diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h index 9c1ba97680253..a9f01785c6c20 100644 --- a/lldb/include/lldb/ValueObject/DILLexer.h +++ b/lldb/include/lldb/ValueObject/DILLexer.h @@ -28,11 +28,12 @@ class Token { arrow, coloncolon, eof, + floating_constant, identifier, + integer_constant, l_paren, l_square, minus, - numeric_constant, period, r_paren, r_square, diff --git a/lldb/include/lldb/ValueObject/DILParser.h b/lldb/include/lldb/ValueObject/DILParser.h index 2cd8ca3be3c02..90df109337dcf 100644 --- a/lldb/include/lldb/ValueObject/DILParser.h +++ b/lldb/include/lldb/ValueObject/DILParser.h @@ -97,7 +97,8 @@ class DILParser { std::string ParseUnqualifiedId(); std::optional ParseIntegerConstant(); ASTNodeUP ParseNumericLiteral(); - ASTNodeUP ParseNumericConstant(); + ASTNodeUP ParseIntegerLiteral(); + ASTNodeUP ParseFloatingPointLiteral(); void BailOut(const std::string &error, uint32_t loc, uint16_t err_len); diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index eaefaf484bc18..00f9a0c515461 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -28,16 +28,18 @@ llvm::StringRef Token::GetTokenName(Kind kind) { return "coloncolon"; case Kind::eof: return "eof"; + case Kind::floating_constant: + return "floating_constant"; case Kind::identifier: return "identifier"; + case Kind::integer_constant: + return "integer_constant"; case Kind::l_paren: return "l_paren"; case Kind::l_square: return "l_square"; case Kind::minus: return "minus"; - case Kind::numeric_constant: - return "numeric_constant"; case Kind::period: return "period"; case Kind::r_paren: @@ -72,12 +74,39 @@ static std::optional IsWord(llvm::StringRef expr, static bool IsNumberBodyChar(char ch) { return IsDigit(ch) || IsLetter(ch); } -static std::optional IsNumber(llvm::StringRef expr, - llvm::StringRef &remainder) { - if (IsDigit(remainder[0])) { - llvm::StringRef number = remainder.take_while(IsNumberBodyChar); - remainder = remainder.drop_front(number.size()); - return number; +static std::optional IsNumber(llvm::StringRef &remainder, + bool &isFloat) { + llvm::StringRef::iterator cur_pos = remainder.begin(); + if (*cur_pos == '.') { + auto next_pos = cur_pos + 1; + if (next_pos == remainder.end() || !IsDigit(*next_pos)) + return std::nullopt; + } + if (IsDigit(*(cur_pos)) || *(cur_pos) == '.') { + while (IsNumberBodyChar(*cur_pos)) + cur_pos++; + + if (*cur_pos == '.') { + isFloat = true; + cur_pos++; + while (IsNumberBodyChar(*cur_pos)) + cur_pos++; + + // Check if there's an exponent + char prev_ch = *(cur_pos - 1); + if (prev_ch == 'e' || prev_ch == 'E' || prev_ch == 'p' || + prev_ch == 'P') { + if (*(cur_pos) == '+' || *(cur_pos) == '-') { + cur_pos++; + while (IsNumberBodyChar(*cur_pos)) + cur_pos++; + } + } + } + + llvm::StringRef number = remainder.substr(0, cur_pos - remainder.begin()); + if (remainder.consume_front(number)) + return number; } return std::nullopt; } @@ -106,9 +135,12 @@ llvm::Expected DILLexer::Lex(llvm::StringRef expr, return Token(Token::eof, "", (uint32_t)expr.size()); uint32_t position = cur_pos - expr.begin(); - std::optional maybe_number = IsNumber(expr, remainder); - if (maybe_number) - return Token(Token::numeric_constant, maybe_number->str(), position); + bool isFloat = false; + std::optional maybe_number = IsNumber(remainder, isFloat); + if (maybe_number) { + auto kind = isFloat ? Token::floating_constant : Token::integer_constant; + return Token(kind, maybe_number->str(), position); + } std::optional maybe_word = IsWord(expr, remainder); if (maybe_word) return Token(Token::identifier, maybe_word->str(), position); diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp index 91b9d764527b3..eee4a7bc4d363 100644 --- a/lldb/source/ValueObject/DILParser.cpp +++ b/lldb/source/ValueObject/DILParser.cpp @@ -179,11 +179,12 @@ ASTNodeUP DILParser::ParsePostfixExpression() { // Parse a primary_expression. // // primary_expression: +// numeric_literal // id_expression // "(" expression ")" // ASTNodeUP DILParser::ParsePrimaryExpression() { - if (CurToken().Is(Token::numeric_constant)) + if (CurToken().IsOneOf({Token::integer_constant, Token::floating_constant})) return ParseNumericLiteral(); if (CurToken().IsOneOf( {Token::coloncolon, Token::identifier, Token::l_paren})) { @@ -348,6 +349,7 @@ void DILParser::BailOut(const std::string &error, uint32_t loc, m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1); } +// FIXME: Remove this once subscript operator uses ScalarLiteralNode. // Parse a integer_literal. // // integer_literal: @@ -375,11 +377,15 @@ std::optional DILParser::ParseIntegerConstant() { // Parse a numeric_literal. // // numeric_literal: -// ? Token::numeric_constant ? +// ? Token::integer_constant ? +// ? Token::floating_constant ? // ASTNodeUP DILParser::ParseNumericLiteral() { - Expect(Token::numeric_constant); - ASTNodeUP numeric_constant = ParseNumericConstant(); + ASTNodeUP numeric_constant; + if (CurToken().Is(Token::integer_constant)) + numeric_constant = ParseIntegerLiteral(); + else + numeric_constant = ParseFloatingPointLiteral(); if (numeric_constant->GetKind() == NodeKind::eErrorNode) { BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}", CurToken()), @@ -398,7 +404,7 @@ static constexpr std::pair type_suffixes[] = { {"l", lldb::eBasicTypeLong}, }; -ASTNodeUP DILParser::ParseNumericConstant() { +ASTNodeUP DILParser::ParseIntegerLiteral() { Token token = CurToken(); auto spelling = token.GetSpelling(); llvm::StringRef spelling_ref = spelling; @@ -418,6 +424,27 @@ ASTNodeUP DILParser::ParseNumericConstant() { return std::make_unique(); } +ASTNodeUP DILParser::ParseFloatingPointLiteral() { + Token token = CurToken(); + auto spelling = token.GetSpelling(); + llvm::StringRef spelling_ref = spelling; + spelling_ref = spelling; + lldb::BasicType type = lldb::eBasicTypeDouble; + llvm::APFloat raw_float(llvm::APFloat::IEEEdouble()); + if (spelling_ref.consume_back_insensitive("f")) { + type = lldb::eBasicTypeFloat; + raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle()); + } + auto StatusOrErr = raw_float.convertFromString( + spelling_ref, llvm::APFloat::rmNearestTiesToEven); + if (!errorToBool(StatusOrErr.takeError())) { + Scalar scalar_value(raw_float); + return std::make_unique(token.GetLocation(), type, + scalar_value); + } + return std::make_unique(); +} + void DILParser::Expect(Token::Kind kind) { if (CurToken().IsNot(kind)) { BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()), diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py index 9dcd2fe85aa5b..9c0d9ec252728 100644 --- a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py +++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py @@ -66,7 +66,7 @@ def test_subscript(self): self.expect( "frame var 'int_arr[1.0]'", error=True, - substrs=["expected 'r_square', got: <'.'"], + substrs=["failed to parse integer constant: <'1.0' (floating_constant)>"], ) # Test accessing bits in scalar types. diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py new file mode 100644 index 0000000000000..883781873a525 --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py @@ -0,0 +1,30 @@ +""" +Test DIL arithmetic. +""" + +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test import lldbutil + + +class TestFrameVarDILArithmetic(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def test_arithmetic(self): + self.build() + lldbutil.run_to_source_breakpoint( + self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp") + ) + + self.runCmd("settings set target.experimental.use-DIL true") + + # Check number parsing + self.expect_var_path("1", value="1", type="int") + self.expect_var_path("1ull", value="1", type="unsigned long long") + self.expect_var_path("0b10", value="2", type="int") + self.expect_var_path("010", value="8", type="int") + self.expect_var_path("0x10", value="16", type="int") + self.expect_var_path("1.0", value="1", type="double") + self.expect_var_path("1.0f", value="1", type="float") + self.expect_var_path("0x1.2p+3f", value="9", type="float") diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp new file mode 100644 index 0000000000000..c9bd8afb0d71d --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp @@ -0,0 +1,3 @@ +int main(int argc, char **argv) { + return 0; // Set a breakpoint here +} diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp index f65034c1dbea3..ff5234ac64451 100644 --- a/lldb/unittests/ValueObject/DILLexerTests.cpp +++ b/lldb/unittests/ValueObject/DILLexerTests.cpp @@ -151,22 +151,31 @@ TEST(DILLexerTests, IdentifiersTest) { Token token = lexer.GetCurrentToken(); EXPECT_TRUE(token.IsNot(Token::identifier)); EXPECT_TRUE(token.IsOneOf({Token::eof, Token::coloncolon, Token::l_paren, - Token::r_paren, Token::numeric_constant})); + Token::r_paren, Token::integer_constant})); } } TEST(DILLexerTests, NumbersTest) { // These strings should lex into number tokens. - std::vector valid_numbers = {"123", "0x123", "0123", "0b101"}; + std::vector valid_integers = {"123", "0x123", "0123", "0b101"}; + std::vector valid_floats = { + "1.2", ".2", "2.f", "0x1.2", "0x.2", ".2e1f", + "2.e+1f", "0x1.f", "0x1.2p1", "0x1.p-1f", "0x1.2p+3f"}; // The lexer can lex these strings, but they should not be numbers. - std::vector invalid_numbers = {"", "x123", "b123"}; + std::vector invalid_numbers = {"", "x123", "b123", "a.b"}; - for (auto &str : valid_numbers) { + for (auto &str : valid_integers) { SCOPED_TRACE(str); EXPECT_THAT_EXPECTED(ExtractTokenData(str), llvm::HasValue(testing::ElementsAre( - testing::Pair(Token::numeric_constant, str)))); + testing::Pair(Token::integer_constant, str)))); + } + for (auto &str : valid_floats) { + SCOPED_TRACE(str); + EXPECT_THAT_EXPECTED(ExtractTokenData(str), + llvm::HasValue(testing::ElementsAre( + testing::Pair(Token::floating_constant, str)))); } // Verify that none of the invalid numbers come out as numeric tokens. for (auto &str : invalid_numbers) { @@ -175,7 +184,7 @@ TEST(DILLexerTests, NumbersTest) { EXPECT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); DILLexer lexer(*maybe_lexer); Token token = lexer.GetCurrentToken(); - EXPECT_TRUE(token.IsNot(Token::numeric_constant)); + EXPECT_TRUE(token.IsNot(Token::integer_constant)); EXPECT_TRUE(token.IsOneOf({Token::eof, Token::identifier})); } } From 132077733f69226e3a5f3b2cc183605b4e18f63c Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Tue, 5 Aug 2025 21:30:15 +0500 Subject: [PATCH 03/17] Automatically pick integer literal type --- lldb/include/lldb/ValueObject/DILAST.h | 24 ++++- lldb/include/lldb/ValueObject/DILEval.h | 5 + lldb/source/ValueObject/DILEval.cpp | 98 +++++++++++++++++-- lldb/source/ValueObject/DILParser.cpp | 43 ++++---- .../Arithmetic/TestFrameVarDILArithmetic.py | 19 +++- 5 files changed, 148 insertions(+), 41 deletions(-) diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h index a174e28ea5c06..a0e5909a8c6a7 100644 --- a/lldb/include/lldb/ValueObject/DILAST.h +++ b/lldb/include/lldb/ValueObject/DILAST.h @@ -181,22 +181,36 @@ class BitFieldExtractionNode : public ASTNode { class ScalarLiteralNode : public ASTNode { public: - ScalarLiteralNode(uint32_t location, lldb::BasicType type, Scalar value) - : ASTNode(location, NodeKind::eScalarLiteralNode), m_type(type), - m_value(value) {} + ScalarLiteralNode(uint32_t location, Scalar value, uint32_t radix, + bool is_unsigned, bool is_long, bool is_longlong) + : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value), + m_radix(radix), m_is_unsigned(is_unsigned), m_is_long(is_long), + m_is_longlong(is_longlong) {} + + ScalarLiteralNode(uint32_t location, Scalar value, bool is_float) + : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value), + m_is_float(is_float) {} llvm::Expected Accept(Visitor *v) const override; - lldb::BasicType GetType() const { return m_type; } Scalar GetValue() const & { return m_value; } + uint32_t GetRadix() const { return m_radix; } + bool IsUnsigned() const { return m_is_unsigned; } + bool IsLong() const { return m_is_long; } + bool IsLongLong() const { return m_is_longlong; } + bool IsFloat() const { return m_is_float; } static bool classof(const ASTNode *node) { return node->GetKind() == NodeKind::eScalarLiteralNode; } private: - lldb::BasicType m_type; Scalar m_value; + uint32_t m_radix; + bool m_is_unsigned; + bool m_is_long; + bool m_is_longlong; + bool m_is_float; }; /// This class contains one Visit method for each specialized type of diff --git a/lldb/include/lldb/ValueObject/DILEval.h b/lldb/include/lldb/ValueObject/DILEval.h index cb2a81d1c7ba1..22a6c5bd0af9a 100644 --- a/lldb/include/lldb/ValueObject/DILEval.h +++ b/lldb/include/lldb/ValueObject/DILEval.h @@ -57,6 +57,11 @@ class Interpreter : Visitor { llvm::Expected Visit(const ScalarLiteralNode *node) override; + llvm::Expected + PickLiteralType(lldb::TypeSystemSP type_system, + std::shared_ptr ctx, + const ScalarLiteralNode *literal); + // Used by the interpreter to create objects, perform casts, etc. lldb::TargetSP m_target; llvm::StringRef m_expr; diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp index ff2fc949e3d10..ff994a3e280a4 100644 --- a/lldb/source/ValueObject/DILEval.cpp +++ b/lldb/source/ValueObject/DILEval.cpp @@ -499,8 +499,7 @@ Interpreter::Visit(const BitFieldExtractionNode *node) { return child_valobj_sp; } -static CompilerType GetBasicTypeFromCU(std::shared_ptr ctx, - lldb::BasicType basic_type) { +static lldb::TypeSystemSP GetTypeSystemFromCU(std::shared_ptr ctx) { SymbolContext symbol_context = ctx->GetSymbolContext(lldb::eSymbolContextCompUnit); auto language = symbol_context.comp_unit->GetLanguage(); @@ -509,20 +508,103 @@ static CompilerType GetBasicTypeFromCU(std::shared_ptr ctx, auto type_system = symbol_context.module_sp->GetTypeSystemForLanguage(language); + if (type_system) + return *type_system; + + return lldb::TypeSystemSP(); +} + +static CompilerType GetBasicType(lldb::TypeSystemSP type_system, + lldb::BasicType basic_type) { if (type_system) if (auto compiler_type = type_system.get()->GetBasicTypeFromAST(basic_type)) return compiler_type; - return CompilerType(); + CompilerType empty_type; + return empty_type; +} + +llvm::Expected +Interpreter::PickLiteralType(lldb::TypeSystemSP type_system, + std::shared_ptr ctx, + const ScalarLiteralNode *literal) { + Scalar scalar = literal->GetValue(); + if (scalar.GetType() == Scalar::e_float) { + if (literal->IsFloat()) + return GetBasicType(type_system, lldb::eBasicTypeFloat); + return GetBasicType(type_system, lldb::eBasicTypeDouble); + } else if (scalar.GetType() == Scalar::e_int) { + // Binary, Octal, Hexadecimal and literals with a U suffix are allowed to be + // an unsigned integer. + bool unsigned_is_allowed = + literal->IsUnsigned() || literal->GetRadix() != 10; + + // Try int/unsigned int. + uint64_t int_byte_size = 0; + if (auto temp = GetBasicType(type_system, lldb::eBasicTypeInt) + .GetByteSize(ctx.get())) + int_byte_size = *temp; + unsigned int_size = int_byte_size * CHAR_BIT; + llvm::APInt apint = scalar.GetAPSInt(); + if (!literal->IsLong() && !literal->IsLongLong() && + apint.isIntN(int_size)) { + if (!literal->IsUnsigned() && apint.isIntN(int_size - 1)) + return GetBasicType(type_system, lldb::eBasicTypeInt); + if (unsigned_is_allowed) + return GetBasicType(type_system, lldb::eBasicTypeUnsignedInt); + } + // Try long/unsigned long. + uint64_t long_byte_size = 0; + if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLong) + .GetByteSize(ctx.get())) + long_byte_size = *temp; + unsigned long_size = long_byte_size * CHAR_BIT; + if (!literal->IsLongLong() && apint.isIntN(long_size)) { + if (!literal->IsUnsigned() && apint.isIntN(long_size - 1)) + return GetBasicType(type_system, lldb::eBasicTypeLong); + if (unsigned_is_allowed) + return GetBasicType(type_system, lldb::eBasicTypeUnsignedLong); + } + // Try long long/unsigned long long. + uint64_t long_long_byte_size = 0; + if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLongLong) + .GetByteSize(ctx.get())) + long_long_byte_size = *temp; + unsigned long_long_size = long_long_byte_size * CHAR_BIT; + if (apint.isIntN(long_long_size)) { + if (!literal->IsUnsigned() && apint.isIntN(long_long_size - 1)) + return GetBasicType(type_system, lldb::eBasicTypeLongLong); + // If we still couldn't decide a type, we probably have something that + // does not fit in a signed long long, but has no U suffix. Also known as: + // + // warning: integer literal is too large to be represented in a signed + // integer type, interpreting as unsigned [-Wimplicitly-unsigned-literal] + // + return GetBasicType(type_system, lldb::eBasicTypeUnsignedLongLong); + } + return llvm::make_error( + m_expr, + "integer literal is too large to be represented in any integer type", + literal->GetLocation()); + } + return llvm::make_error( + m_expr, "unable to create a const literal", literal->GetLocation()); } llvm::Expected Interpreter::Visit(const ScalarLiteralNode *node) { - CompilerType result_type = - GetBasicTypeFromCU(m_exe_ctx_scope, node->GetType()); - Scalar value = node->GetValue(); - return ValueObject::CreateValueObjectFromScalar(m_target, value, result_type, - "result"); + auto type_system = GetTypeSystemFromCU(m_exe_ctx_scope); + if (type_system) { + auto type = PickLiteralType(type_system, m_exe_ctx_scope, node); + if (type) { + Scalar scalar = node->GetValue(); + return ValueObject::CreateValueObjectFromScalar(m_target, scalar, *type, + "result"); + } else + return type.takeError(); + } + return llvm::make_error( + m_expr, "unable to create a const literal", node->GetLocation()); } } // namespace lldb_private::dil diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp index eee4a7bc4d363..35eb6d62b7ba4 100644 --- a/lldb/source/ValueObject/DILParser.cpp +++ b/lldb/source/ValueObject/DILParser.cpp @@ -396,30 +396,26 @@ ASTNodeUP DILParser::ParseNumericLiteral() { return numeric_constant; } -static constexpr std::pair type_suffixes[] = { - {"ull", lldb::eBasicTypeUnsignedLongLong}, - {"ul", lldb::eBasicTypeUnsignedLong}, - {"u", lldb::eBasicTypeUnsignedInt}, - {"ll", lldb::eBasicTypeLongLong}, - {"l", lldb::eBasicTypeLong}, -}; - ASTNodeUP DILParser::ParseIntegerLiteral() { Token token = CurToken(); auto spelling = token.GetSpelling(); llvm::StringRef spelling_ref = spelling; - lldb::BasicType type = lldb::eBasicTypeInt; - for (auto [suffix, t] : type_suffixes) { - if (spelling_ref.consume_back_insensitive(suffix)) { - type = t; - break; - } - } + + auto radix = llvm::getAutoSenseRadix(spelling_ref); + bool is_unsigned = false, is_long = false, is_longlong = false; + if (spelling_ref.consume_back_insensitive("ll")) + is_longlong = true; + if (spelling_ref.consume_back_insensitive("l")) + is_long = true; + if (spelling_ref.consume_back_insensitive("u")) + is_unsigned = true; + llvm::APInt raw_value; - if (!spelling_ref.getAsInteger(0, raw_value)) { + if (!spelling_ref.getAsInteger(radix, raw_value)) { Scalar scalar_value(raw_value); - return std::make_unique(token.GetLocation(), type, - scalar_value); + return std::make_unique(token.GetLocation(), + scalar_value, radix, is_unsigned, + is_long, is_longlong); } return std::make_unique(); } @@ -428,19 +424,20 @@ ASTNodeUP DILParser::ParseFloatingPointLiteral() { Token token = CurToken(); auto spelling = token.GetSpelling(); llvm::StringRef spelling_ref = spelling; - spelling_ref = spelling; - lldb::BasicType type = lldb::eBasicTypeDouble; + + bool is_float = false; llvm::APFloat raw_float(llvm::APFloat::IEEEdouble()); if (spelling_ref.consume_back_insensitive("f")) { - type = lldb::eBasicTypeFloat; + is_float = true; raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle()); } + auto StatusOrErr = raw_float.convertFromString( spelling_ref, llvm::APFloat::rmNearestTiesToEven); if (!errorToBool(StatusOrErr.takeError())) { Scalar scalar_value(raw_float); - return std::make_unique(token.GetLocation(), type, - scalar_value); + return std::make_unique(token.GetLocation(), + scalar_value, is_float); } return std::make_unique(); } diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py index 883781873a525..57a636ebb0829 100644 --- a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py +++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py @@ -20,11 +20,20 @@ def test_arithmetic(self): self.runCmd("settings set target.experimental.use-DIL true") # Check number parsing - self.expect_var_path("1", value="1", type="int") - self.expect_var_path("1ull", value="1", type="unsigned long long") - self.expect_var_path("0b10", value="2", type="int") - self.expect_var_path("010", value="8", type="int") - self.expect_var_path("0x10", value="16", type="int") self.expect_var_path("1.0", value="1", type="double") self.expect_var_path("1.0f", value="1", type="float") self.expect_var_path("0x1.2p+3f", value="9", type="float") + self.expect_var_path("1", value="1", type="int") + self.expect_var_path("1u", value="1", type="unsigned int") + self.expect_var_path("0b1l", value="1", type="long") + self.expect_var_path("01ul", value="1", type="unsigned long") + self.expect_var_path("0o1ll", value="1", type="long long") + self.expect_var_path("0x1ULL", value="1", type="unsigned long long") + self.expect_var_path("0xFFFFFFFFFFFFFFFF", value="18446744073709551615") + self.expect( + "frame var '0xFFFFFFFFFFFFFFFFF'", + error=True, + substrs=[ + "integer literal is too large to be represented in any integer type" + ], + ) From 902f72ac657fd823d9bdb0c11050349bd3908e86 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Thu, 7 Aug 2025 22:51:58 +0500 Subject: [PATCH 04/17] Adjust APInt bitwidth --- lldb/source/ValueObject/DILEval.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp index ff994a3e280a4..f662da9f15615 100644 --- a/lldb/source/ValueObject/DILEval.cpp +++ b/lldb/source/ValueObject/DILEval.cpp @@ -598,6 +598,16 @@ Interpreter::Visit(const ScalarLiteralNode *node) { auto type = PickLiteralType(type_system, m_exe_ctx_scope, node); if (type) { Scalar scalar = node->GetValue(); + // APInt from StringRef::getAsInteger comes with just enough bitwidth to + // hold the value. This adjusts APInt bitwidth to match the compiler type. + if (scalar.GetType() == scalar.e_int) { + auto apsint = scalar.GetAPSInt(); + auto type_bitsize = type->GetBitSize(m_exe_ctx_scope.get()); + if (type_bitsize) { + llvm::APInt adjusted = apsint.zextOrTrunc(*type_bitsize); + scalar = Scalar(adjusted); + } + } return ValueObject::CreateValueObjectFromScalar(m_target, scalar, *type, "result"); } else From bfed95bc3428d7a2257dbd02fe73854b64fe0fe9 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Fri, 8 Aug 2025 17:56:09 +0500 Subject: [PATCH 05/17] Adjust code formatting --- lldb/include/lldb/ValueObject/DILAST.h | 2 +- lldb/source/ValueObject/DILEval.cpp | 46 ++++++++++++-------------- lldb/source/ValueObject/DILLexer.cpp | 4 +-- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h index a0e5909a8c6a7..7ceae6be94e4c 100644 --- a/lldb/include/lldb/ValueObject/DILAST.h +++ b/lldb/include/lldb/ValueObject/DILAST.h @@ -193,7 +193,7 @@ class ScalarLiteralNode : public ASTNode { llvm::Expected Accept(Visitor *v) const override; - Scalar GetValue() const & { return m_value; } + Scalar GetValue() const { return m_value; } uint32_t GetRadix() const { return m_radix; } bool IsUnsigned() const { return m_is_unsigned; } bool IsLong() const { return m_is_long; } diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp index f662da9f15615..bcdd5074f1be5 100644 --- a/lldb/source/ValueObject/DILEval.cpp +++ b/lldb/source/ValueObject/DILEval.cpp @@ -502,10 +502,10 @@ Interpreter::Visit(const BitFieldExtractionNode *node) { static lldb::TypeSystemSP GetTypeSystemFromCU(std::shared_ptr ctx) { SymbolContext symbol_context = ctx->GetSymbolContext(lldb::eSymbolContextCompUnit); - auto language = symbol_context.comp_unit->GetLanguage(); + lldb::LanguageType language = symbol_context.comp_unit->GetLanguage(); symbol_context = ctx->GetSymbolContext(lldb::eSymbolContextModule); - auto type_system = + llvm::Expected type_system = symbol_context.module_sp->GetTypeSystemForLanguage(language); if (type_system) @@ -533,7 +533,8 @@ Interpreter::PickLiteralType(lldb::TypeSystemSP type_system, if (literal->IsFloat()) return GetBasicType(type_system, lldb::eBasicTypeFloat); return GetBasicType(type_system, lldb::eBasicTypeDouble); - } else if (scalar.GetType() == Scalar::e_int) { + } + if (scalar.GetType() == Scalar::e_int) { // Binary, Octal, Hexadecimal and literals with a U suffix are allowed to be // an unsigned integer. bool unsigned_is_allowed = @@ -594,27 +595,24 @@ Interpreter::PickLiteralType(lldb::TypeSystemSP type_system, llvm::Expected Interpreter::Visit(const ScalarLiteralNode *node) { auto type_system = GetTypeSystemFromCU(m_exe_ctx_scope); - if (type_system) { - auto type = PickLiteralType(type_system, m_exe_ctx_scope, node); - if (type) { - Scalar scalar = node->GetValue(); - // APInt from StringRef::getAsInteger comes with just enough bitwidth to - // hold the value. This adjusts APInt bitwidth to match the compiler type. - if (scalar.GetType() == scalar.e_int) { - auto apsint = scalar.GetAPSInt(); - auto type_bitsize = type->GetBitSize(m_exe_ctx_scope.get()); - if (type_bitsize) { - llvm::APInt adjusted = apsint.zextOrTrunc(*type_bitsize); - scalar = Scalar(adjusted); - } - } - return ValueObject::CreateValueObjectFromScalar(m_target, scalar, *type, - "result"); - } else - return type.takeError(); - } - return llvm::make_error( - m_expr, "unable to create a const literal", node->GetLocation()); + if (!type_system) + return llvm::make_error( + m_expr, "unable to create a const literal", node->GetLocation()); + + auto type = PickLiteralType(type_system, m_exe_ctx_scope, node); + if (type) { + Scalar scalar = node->GetValue(); + // APInt from StringRef::getAsInteger comes with just enough bitwidth to + // hold the value. This adjusts APInt bitwidth to match the compiler type. + if (scalar.GetType() == scalar.e_int) { + auto type_bitsize = type->GetBitSize(m_exe_ctx_scope.get()); + if (type_bitsize) + scalar.TruncOrExtendTo(*type_bitsize, false); + } + return ValueObject::CreateValueObjectFromScalar(m_target, scalar, *type, + "result"); + } else + return type.takeError(); } } // namespace lldb_private::dil diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index 00f9a0c515461..4cf33b7a0e868 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -82,7 +82,7 @@ static std::optional IsNumber(llvm::StringRef &remainder, if (next_pos == remainder.end() || !IsDigit(*next_pos)) return std::nullopt; } - if (IsDigit(*(cur_pos)) || *(cur_pos) == '.') { + if (IsDigit(*cur_pos) || *cur_pos == '.') { while (IsNumberBodyChar(*cur_pos)) cur_pos++; @@ -96,7 +96,7 @@ static std::optional IsNumber(llvm::StringRef &remainder, char prev_ch = *(cur_pos - 1); if (prev_ch == 'e' || prev_ch == 'E' || prev_ch == 'p' || prev_ch == 'P') { - if (*(cur_pos) == '+' || *(cur_pos) == '-') { + if (*cur_pos == '+' || *cur_pos == '-') { cur_pos++; while (IsNumberBodyChar(*cur_pos)) cur_pos++; From 6989af6f894fab7f525a8f234125772321258388 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Fri, 8 Aug 2025 18:05:44 +0500 Subject: [PATCH 06/17] Fix suffix parsing --- lldb/source/ValueObject/DILParser.cpp | 4 +++- .../var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp index 35eb6d62b7ba4..b576407b38e2a 100644 --- a/lldb/source/ValueObject/DILParser.cpp +++ b/lldb/source/ValueObject/DILParser.cpp @@ -403,11 +403,13 @@ ASTNodeUP DILParser::ParseIntegerLiteral() { auto radix = llvm::getAutoSenseRadix(spelling_ref); bool is_unsigned = false, is_long = false, is_longlong = false; + if (spelling_ref.consume_back_insensitive("u")) + is_unsigned = true; if (spelling_ref.consume_back_insensitive("ll")) is_longlong = true; if (spelling_ref.consume_back_insensitive("l")) is_long = true; - if (spelling_ref.consume_back_insensitive("u")) + if (!is_unsigned && spelling_ref.consume_back_insensitive("u")) is_unsigned = true; llvm::APInt raw_value; diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py index 57a636ebb0829..9e1889066225d 100644 --- a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py +++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py @@ -27,11 +27,18 @@ def test_arithmetic(self): self.expect_var_path("1u", value="1", type="unsigned int") self.expect_var_path("0b1l", value="1", type="long") self.expect_var_path("01ul", value="1", type="unsigned long") + self.expect_var_path("01lu", value="1", type="unsigned long") self.expect_var_path("0o1ll", value="1", type="long long") self.expect_var_path("0x1ULL", value="1", type="unsigned long long") + self.expect_var_path("0x1llu", value="1", type="unsigned long long") + self.expect( + "frame var '1ullu'", + error=True, + substrs=["Failed to parse token as numeric-constant"], + ) self.expect_var_path("0xFFFFFFFFFFFFFFFF", value="18446744073709551615") self.expect( - "frame var '0xFFFFFFFFFFFFFFFFF'", + "frame var '0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'", error=True, substrs=[ "integer literal is too large to be represented in any integer type" From 967549ea8bedad5b5ed181952d7e9d97e3a6a4ce Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Fri, 8 Aug 2025 19:11:31 +0500 Subject: [PATCH 07/17] Add remainder.end() guard --- lldb/source/ValueObject/DILLexer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index 4cf33b7a0e868..b79501cbf520b 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -103,7 +103,8 @@ static std::optional IsNumber(llvm::StringRef &remainder, } } } - + if (cur_pos > remainder.end()) + cur_pos = remainder.end(); llvm::StringRef number = remainder.substr(0, cur_pos - remainder.begin()); if (remainder.consume_front(number)) return number; From 5eb7dc2a3c9b67320e06bab90bd8ac053dfd5956 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Tue, 12 Aug 2025 23:22:31 +0500 Subject: [PATCH 08/17] Redo IsNumber without iterators --- lldb/include/lldb/ValueObject/DILLexer.h | 1 + lldb/source/ValueObject/DILLexer.cpp | 64 +++++++++----------- lldb/unittests/ValueObject/DILLexerTests.cpp | 20 ++++++ 3 files changed, 50 insertions(+), 35 deletions(-) diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h index a9f01785c6c20..cbffb6375778c 100644 --- a/lldb/include/lldb/ValueObject/DILLexer.h +++ b/lldb/include/lldb/ValueObject/DILLexer.h @@ -35,6 +35,7 @@ class Token { l_square, minus, period, + plus, r_paren, r_square, star, diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index b79501cbf520b..fd6be1ec89063 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -42,6 +42,9 @@ llvm::StringRef Token::GetTokenName(Kind kind) { return "minus"; case Kind::period: return "period"; + return "l_square"; + case Kind::plus: + return "plus"; case Kind::r_paren: return "r_paren"; case Kind::r_square: @@ -72,42 +75,33 @@ static std::optional IsWord(llvm::StringRef expr, return candidate; } -static bool IsNumberBodyChar(char ch) { return IsDigit(ch) || IsLetter(ch); } +static bool IsNumberBodyChar(char ch) { + return IsDigit(ch) || IsLetter(ch) || ch == '.'; +} static std::optional IsNumber(llvm::StringRef &remainder, bool &isFloat) { - llvm::StringRef::iterator cur_pos = remainder.begin(); - if (*cur_pos == '.') { - auto next_pos = cur_pos + 1; - if (next_pos == remainder.end() || !IsDigit(*next_pos)) - return std::nullopt; - } - if (IsDigit(*cur_pos) || *cur_pos == '.') { - while (IsNumberBodyChar(*cur_pos)) - cur_pos++; - - if (*cur_pos == '.') { - isFloat = true; - cur_pos++; - while (IsNumberBodyChar(*cur_pos)) - cur_pos++; - - // Check if there's an exponent - char prev_ch = *(cur_pos - 1); - if (prev_ch == 'e' || prev_ch == 'E' || prev_ch == 'p' || - prev_ch == 'P') { - if (*cur_pos == '+' || *cur_pos == '-') { - cur_pos++; - while (IsNumberBodyChar(*cur_pos)) - cur_pos++; - } + llvm::StringRef tail = remainder; + llvm::StringRef body = tail.take_while(IsNumberBodyChar); + if (body.empty()) + return std::nullopt; + size_t dots = body.count('.'); + if (dots > 1 || dots == body.size()) + return std::nullopt; + if (IsDigit(body.front()) || (body[0] == '.' && IsDigit(body[1]))) { + isFloat = dots == 1; + char last = body.back(); + tail = tail.drop_front(body.size()); + if (last == 'e' || last == 'E' || last == 'p' || last == 'P') { + if (!tail.empty() && (tail.front() == '+' || tail.front() == '-')) { + tail = tail.drop_front(); + tail = tail.drop_while(IsNumberBodyChar); } } - if (cur_pos > remainder.end()) - cur_pos = remainder.end(); - llvm::StringRef number = remainder.substr(0, cur_pos - remainder.begin()); - if (remainder.consume_front(number)) - return number; + size_t number_length = remainder.size() - tail.size(); + llvm::StringRef number = remainder.take_front(number_length); + remainder = remainder.drop_front(number_length); + return number; } return std::nullopt; } @@ -147,10 +141,10 @@ llvm::Expected DILLexer::Lex(llvm::StringRef expr, return Token(Token::identifier, maybe_word->str(), position); constexpr std::pair operators[] = { - {Token::amp, "&"}, {Token::arrow, "->"}, {Token::coloncolon, "::"}, - {Token::l_paren, "("}, {Token::l_square, "["}, {Token::minus, "-"}, - {Token::period, "."}, {Token::r_paren, ")"}, {Token::r_square, "]"}, - {Token::star, "*"}, + {Token::amp, "&"}, {Token::arrow, "->"}, {Token::coloncolon, "::"}, + {Token::l_paren, "("}, {Token::l_square, "["}, {Token::minus, "-"}, + {Token::period, "."}, {Token::plus, "+"}, {Token::r_paren, ")"}, + {Token::r_square, "]"}, {Token::star, "*"}, }; for (auto [kind, str] : operators) { if (remainder.consume_front(str)) diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp index ff5234ac64451..02b6062a604ce 100644 --- a/lldb/unittests/ValueObject/DILLexerTests.cpp +++ b/lldb/unittests/ValueObject/DILLexerTests.cpp @@ -187,4 +187,24 @@ TEST(DILLexerTests, NumbersTest) { EXPECT_TRUE(token.IsNot(Token::integer_constant)); EXPECT_TRUE(token.IsOneOf({Token::eof, Token::identifier})); } + + // Verify that '-' and '+' are not lexed if they're not part of a number + std::vector expressions = {"1+e", "0x1+p", "1.1+e", + "1.1e1+e", "0x1.1p-1-p", "1e-1+e", + "1e1+e", "0x1p-1-p"}; + for (auto &str : expressions) { + SCOPED_TRACE(str); + llvm::Expected maybe_lexer = DILLexer::Create(str); + EXPECT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); + DILLexer lexer(*maybe_lexer); + Token token = lexer.GetCurrentToken(); + EXPECT_TRUE( + token.IsOneOf({Token::integer_constant, Token::floating_constant})); + lexer.Advance(); + token = lexer.GetCurrentToken(); + EXPECT_TRUE(token.IsOneOf({Token::plus, Token::minus})); + lexer.Advance(); + token = lexer.GetCurrentToken(); + EXPECT_TRUE(token.Is(Token::identifier)); + } } From 45498a4cbf5bfc493c4c74eaa091945c42ec5d78 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Wed, 13 Aug 2025 00:09:54 +0500 Subject: [PATCH 09/17] Split ScalarLiteralNode --- lldb/include/lldb/ValueObject/DILAST.h | 38 +++++-- lldb/include/lldb/ValueObject/DILEval.h | 8 +- lldb/source/ValueObject/DILAST.cpp | 6 +- lldb/source/ValueObject/DILEval.cpp | 138 ++++++++++++------------ lldb/source/ValueObject/DILParser.cpp | 12 +-- 5 files changed, 113 insertions(+), 89 deletions(-) diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h index 7ceae6be94e4c..7d9680c27c711 100644 --- a/lldb/include/lldb/ValueObject/DILAST.h +++ b/lldb/include/lldb/ValueObject/DILAST.h @@ -179,37 +179,51 @@ class BitFieldExtractionNode : public ASTNode { int64_t m_last_index; }; -class ScalarLiteralNode : public ASTNode { +class IntegerLiteralNode : public ASTNode { public: - ScalarLiteralNode(uint32_t location, Scalar value, uint32_t radix, - bool is_unsigned, bool is_long, bool is_longlong) + IntegerLiteralNode(uint32_t location, llvm::APInt value, uint32_t radix, + bool is_unsigned, bool is_long, bool is_longlong) : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value), m_radix(radix), m_is_unsigned(is_unsigned), m_is_long(is_long), m_is_longlong(is_longlong) {} - ScalarLiteralNode(uint32_t location, Scalar value, bool is_float) - : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value), - m_is_float(is_float) {} - llvm::Expected Accept(Visitor *v) const override; - Scalar GetValue() const { return m_value; } + llvm::APInt GetValue() const { return m_value; } uint32_t GetRadix() const { return m_radix; } bool IsUnsigned() const { return m_is_unsigned; } bool IsLong() const { return m_is_long; } bool IsLongLong() const { return m_is_longlong; } - bool IsFloat() const { return m_is_float; } static bool classof(const ASTNode *node) { return node->GetKind() == NodeKind::eScalarLiteralNode; } private: - Scalar m_value; + llvm::APInt m_value; uint32_t m_radix; bool m_is_unsigned; bool m_is_long; bool m_is_longlong; +}; + +class FloatLiteralNode : public ASTNode { +public: + FloatLiteralNode(uint32_t location, llvm::APFloat value, bool is_float) + : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value), + m_is_float(is_float) {} + + llvm::Expected Accept(Visitor *v) const override; + + llvm::APFloat GetValue() const { return m_value; } + bool IsFloat() const { return m_is_float; } + + static bool classof(const ASTNode *node) { + return node->GetKind() == NodeKind::eScalarLiteralNode; + } + +private: + llvm::APFloat m_value; bool m_is_float; }; @@ -231,7 +245,9 @@ class Visitor { virtual llvm::Expected Visit(const BitFieldExtractionNode *node) = 0; virtual llvm::Expected - Visit(const ScalarLiteralNode *node) = 0; + Visit(const IntegerLiteralNode *node) = 0; + virtual llvm::Expected + Visit(const FloatLiteralNode *node) = 0; }; } // namespace lldb_private::dil diff --git a/lldb/include/lldb/ValueObject/DILEval.h b/lldb/include/lldb/ValueObject/DILEval.h index 22a6c5bd0af9a..5a48c2c989f4d 100644 --- a/lldb/include/lldb/ValueObject/DILEval.h +++ b/lldb/include/lldb/ValueObject/DILEval.h @@ -55,12 +55,14 @@ class Interpreter : Visitor { llvm::Expected Visit(const BitFieldExtractionNode *node) override; llvm::Expected - Visit(const ScalarLiteralNode *node) override; + Visit(const IntegerLiteralNode *node) override; + llvm::Expected + Visit(const FloatLiteralNode *node) override; llvm::Expected - PickLiteralType(lldb::TypeSystemSP type_system, + PickIntegerType(lldb::TypeSystemSP type_system, std::shared_ptr ctx, - const ScalarLiteralNode *literal); + const IntegerLiteralNode *literal); // Used by the interpreter to create objects, perform casts, etc. lldb::TargetSP m_target; diff --git a/lldb/source/ValueObject/DILAST.cpp b/lldb/source/ValueObject/DILAST.cpp index 38215ae18f6ce..70564663a62cd 100644 --- a/lldb/source/ValueObject/DILAST.cpp +++ b/lldb/source/ValueObject/DILAST.cpp @@ -38,7 +38,11 @@ BitFieldExtractionNode::Accept(Visitor *v) const { } llvm::Expected -ScalarLiteralNode::Accept(Visitor *v) const { +IntegerLiteralNode::Accept(Visitor *v) const { + return v->Visit(this); +} + +llvm::Expected FloatLiteralNode::Accept(Visitor *v) const { return v->Visit(this); } diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp index bcdd5074f1be5..fd6c9388337b9 100644 --- a/lldb/source/ValueObject/DILEval.cpp +++ b/lldb/source/ValueObject/DILEval.cpp @@ -525,94 +525,96 @@ static CompilerType GetBasicType(lldb::TypeSystemSP type_system, } llvm::Expected -Interpreter::PickLiteralType(lldb::TypeSystemSP type_system, +Interpreter::PickIntegerType(lldb::TypeSystemSP type_system, std::shared_ptr ctx, - const ScalarLiteralNode *literal) { - Scalar scalar = literal->GetValue(); - if (scalar.GetType() == Scalar::e_float) { - if (literal->IsFloat()) - return GetBasicType(type_system, lldb::eBasicTypeFloat); - return GetBasicType(type_system, lldb::eBasicTypeDouble); + const IntegerLiteralNode *literal) { + // Binary, Octal, Hexadecimal and literals with a U suffix are allowed to be + // an unsigned integer. + bool unsigned_is_allowed = literal->IsUnsigned() || literal->GetRadix() != 10; + + // Try int/unsigned int. + uint64_t int_byte_size = 0; + if (auto temp = + GetBasicType(type_system, lldb::eBasicTypeInt).GetByteSize(ctx.get())) + int_byte_size = *temp; + unsigned int_size = int_byte_size * CHAR_BIT; + llvm::APInt apint = literal->GetValue(); + if (!literal->IsLong() && !literal->IsLongLong() && apint.isIntN(int_size)) { + if (!literal->IsUnsigned() && apint.isIntN(int_size - 1)) + return GetBasicType(type_system, lldb::eBasicTypeInt); + if (unsigned_is_allowed) + return GetBasicType(type_system, lldb::eBasicTypeUnsignedInt); } - if (scalar.GetType() == Scalar::e_int) { - // Binary, Octal, Hexadecimal and literals with a U suffix are allowed to be - // an unsigned integer. - bool unsigned_is_allowed = - literal->IsUnsigned() || literal->GetRadix() != 10; - - // Try int/unsigned int. - uint64_t int_byte_size = 0; - if (auto temp = GetBasicType(type_system, lldb::eBasicTypeInt) - .GetByteSize(ctx.get())) - int_byte_size = *temp; - unsigned int_size = int_byte_size * CHAR_BIT; - llvm::APInt apint = scalar.GetAPSInt(); - if (!literal->IsLong() && !literal->IsLongLong() && - apint.isIntN(int_size)) { - if (!literal->IsUnsigned() && apint.isIntN(int_size - 1)) - return GetBasicType(type_system, lldb::eBasicTypeInt); - if (unsigned_is_allowed) - return GetBasicType(type_system, lldb::eBasicTypeUnsignedInt); - } - // Try long/unsigned long. - uint64_t long_byte_size = 0; - if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLong) - .GetByteSize(ctx.get())) - long_byte_size = *temp; - unsigned long_size = long_byte_size * CHAR_BIT; - if (!literal->IsLongLong() && apint.isIntN(long_size)) { - if (!literal->IsUnsigned() && apint.isIntN(long_size - 1)) - return GetBasicType(type_system, lldb::eBasicTypeLong); - if (unsigned_is_allowed) - return GetBasicType(type_system, lldb::eBasicTypeUnsignedLong); - } - // Try long long/unsigned long long. - uint64_t long_long_byte_size = 0; - if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLongLong) - .GetByteSize(ctx.get())) - long_long_byte_size = *temp; - unsigned long_long_size = long_long_byte_size * CHAR_BIT; - if (apint.isIntN(long_long_size)) { - if (!literal->IsUnsigned() && apint.isIntN(long_long_size - 1)) - return GetBasicType(type_system, lldb::eBasicTypeLongLong); - // If we still couldn't decide a type, we probably have something that - // does not fit in a signed long long, but has no U suffix. Also known as: - // - // warning: integer literal is too large to be represented in a signed - // integer type, interpreting as unsigned [-Wimplicitly-unsigned-literal] - // - return GetBasicType(type_system, lldb::eBasicTypeUnsignedLongLong); - } - return llvm::make_error( - m_expr, - "integer literal is too large to be represented in any integer type", - literal->GetLocation()); + // Try long/unsigned long. + uint64_t long_byte_size = 0; + if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLong) + .GetByteSize(ctx.get())) + long_byte_size = *temp; + unsigned long_size = long_byte_size * CHAR_BIT; + if (!literal->IsLongLong() && apint.isIntN(long_size)) { + if (!literal->IsUnsigned() && apint.isIntN(long_size - 1)) + return GetBasicType(type_system, lldb::eBasicTypeLong); + if (unsigned_is_allowed) + return GetBasicType(type_system, lldb::eBasicTypeUnsignedLong); + } + // Try long long/unsigned long long. + uint64_t long_long_byte_size = 0; + if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLongLong) + .GetByteSize(ctx.get())) + long_long_byte_size = *temp; + unsigned long_long_size = long_long_byte_size * CHAR_BIT; + if (apint.isIntN(long_long_size)) { + if (!literal->IsUnsigned() && apint.isIntN(long_long_size - 1)) + return GetBasicType(type_system, lldb::eBasicTypeLongLong); + // If we still couldn't decide a type, we probably have something that + // does not fit in a signed long long, but has no U suffix. Also known as: + // + // warning: integer literal is too large to be represented in a signed + // integer type, interpreting as unsigned [-Wimplicitly-unsigned-literal] + // + return GetBasicType(type_system, lldb::eBasicTypeUnsignedLongLong); } return llvm::make_error( - m_expr, "unable to create a const literal", literal->GetLocation()); + m_expr, + "integer literal is too large to be represented in any integer type", + literal->GetLocation()); } llvm::Expected -Interpreter::Visit(const ScalarLiteralNode *node) { +Interpreter::Visit(const IntegerLiteralNode *node) { auto type_system = GetTypeSystemFromCU(m_exe_ctx_scope); if (!type_system) return llvm::make_error( m_expr, "unable to create a const literal", node->GetLocation()); - auto type = PickLiteralType(type_system, m_exe_ctx_scope, node); + auto type = PickIntegerType(type_system, m_exe_ctx_scope, node); if (type) { Scalar scalar = node->GetValue(); // APInt from StringRef::getAsInteger comes with just enough bitwidth to // hold the value. This adjusts APInt bitwidth to match the compiler type. - if (scalar.GetType() == scalar.e_int) { - auto type_bitsize = type->GetBitSize(m_exe_ctx_scope.get()); - if (type_bitsize) - scalar.TruncOrExtendTo(*type_bitsize, false); - } + auto type_bitsize = type->GetBitSize(m_exe_ctx_scope.get()); + if (type_bitsize) + scalar.TruncOrExtendTo(*type_bitsize, false); return ValueObject::CreateValueObjectFromScalar(m_target, scalar, *type, "result"); } else return type.takeError(); } +llvm::Expected +Interpreter::Visit(const FloatLiteralNode *node) { + auto type_system = GetTypeSystemFromCU(m_exe_ctx_scope); + lldb::BasicType basic_type = + node->IsFloat() ? lldb::eBasicTypeFloat : lldb::eBasicTypeDouble; + CompilerType type = GetBasicType(type_system, basic_type); + + if (!type) + return llvm::make_error( + m_expr, "unable to create a const literal", node->GetLocation()); + + Scalar scalar = node->GetValue(); + return ValueObject::CreateValueObjectFromScalar(m_target, scalar, type, + "result"); +} + } // namespace lldb_private::dil diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp index b576407b38e2a..241bc84462c88 100644 --- a/lldb/source/ValueObject/DILParser.cpp +++ b/lldb/source/ValueObject/DILParser.cpp @@ -409,15 +409,16 @@ ASTNodeUP DILParser::ParseIntegerLiteral() { is_longlong = true; if (spelling_ref.consume_back_insensitive("l")) is_long = true; + // Suffix 'u' can be only specified only once, before or after 'l' if (!is_unsigned && spelling_ref.consume_back_insensitive("u")) is_unsigned = true; llvm::APInt raw_value; if (!spelling_ref.getAsInteger(radix, raw_value)) { Scalar scalar_value(raw_value); - return std::make_unique(token.GetLocation(), - scalar_value, radix, is_unsigned, - is_long, is_longlong); + return std::make_unique(token.GetLocation(), raw_value, + radix, is_unsigned, is_long, + is_longlong); } return std::make_unique(); } @@ -437,9 +438,8 @@ ASTNodeUP DILParser::ParseFloatingPointLiteral() { auto StatusOrErr = raw_float.convertFromString( spelling_ref, llvm::APFloat::rmNearestTiesToEven); if (!errorToBool(StatusOrErr.takeError())) { - Scalar scalar_value(raw_float); - return std::make_unique(token.GetLocation(), - scalar_value, is_float); + return std::make_unique(token.GetLocation(), raw_float, + is_float); } return std::make_unique(); } From 199dd80f068d944e97c54826033f83f5e8cead7f Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Wed, 13 Aug 2025 20:12:44 +0500 Subject: [PATCH 10/17] Add tests for PickIntegerType --- .../Arithmetic/TestFrameVarDILArithmetic.py | 46 ----------- .../expr/{Arithmetic => Literals}/Makefile | 0 .../expr/Literals/TestFrameVarDILLiterals.py | 76 +++++++++++++++++++ .../expr/{Arithmetic => Literals}/main.cpp | 0 4 files changed, 76 insertions(+), 46 deletions(-) delete mode 100644 lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py rename lldb/test/API/commands/frame/var-dil/expr/{Arithmetic => Literals}/Makefile (100%) create mode 100644 lldb/test/API/commands/frame/var-dil/expr/Literals/TestFrameVarDILLiterals.py rename lldb/test/API/commands/frame/var-dil/expr/{Arithmetic => Literals}/main.cpp (100%) diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py deleted file mode 100644 index 9e1889066225d..0000000000000 --- a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Test DIL arithmetic. -""" - -import lldb -from lldbsuite.test.lldbtest import * -from lldbsuite.test.decorators import * -from lldbsuite.test import lldbutil - - -class TestFrameVarDILArithmetic(TestBase): - NO_DEBUG_INFO_TESTCASE = True - - def test_arithmetic(self): - self.build() - lldbutil.run_to_source_breakpoint( - self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp") - ) - - self.runCmd("settings set target.experimental.use-DIL true") - - # Check number parsing - self.expect_var_path("1.0", value="1", type="double") - self.expect_var_path("1.0f", value="1", type="float") - self.expect_var_path("0x1.2p+3f", value="9", type="float") - self.expect_var_path("1", value="1", type="int") - self.expect_var_path("1u", value="1", type="unsigned int") - self.expect_var_path("0b1l", value="1", type="long") - self.expect_var_path("01ul", value="1", type="unsigned long") - self.expect_var_path("01lu", value="1", type="unsigned long") - self.expect_var_path("0o1ll", value="1", type="long long") - self.expect_var_path("0x1ULL", value="1", type="unsigned long long") - self.expect_var_path("0x1llu", value="1", type="unsigned long long") - self.expect( - "frame var '1ullu'", - error=True, - substrs=["Failed to parse token as numeric-constant"], - ) - self.expect_var_path("0xFFFFFFFFFFFFFFFF", value="18446744073709551615") - self.expect( - "frame var '0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'", - error=True, - substrs=[ - "integer literal is too large to be represented in any integer type" - ], - ) diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile b/lldb/test/API/commands/frame/var-dil/expr/Literals/Makefile similarity index 100% rename from lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile rename to lldb/test/API/commands/frame/var-dil/expr/Literals/Makefile diff --git a/lldb/test/API/commands/frame/var-dil/expr/Literals/TestFrameVarDILLiterals.py b/lldb/test/API/commands/frame/var-dil/expr/Literals/TestFrameVarDILLiterals.py new file mode 100644 index 0000000000000..7fbfb515035cd --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/expr/Literals/TestFrameVarDILLiterals.py @@ -0,0 +1,76 @@ +""" +Test DIL literals. +""" + +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test import lldbutil + + +class TestFrameVarDILLiterals(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def test_literals(self): + self.build() + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( + self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp") + ) + + self.runCmd("settings set target.experimental.use-DIL true") + + # Check number literals parsing + self.expect_var_path("1.0", value="1", type="double") + self.expect_var_path("1.0f", value="1", type="float") + self.expect_var_path("0x1.2p+3f", value="9", type="float") + self.expect_var_path("1", value="1", type="int") + self.expect_var_path("1u", value="1", type="unsigned int") + self.expect_var_path("0b1l", value="1", type="long") + self.expect_var_path("01ul", value="1", type="unsigned long") + self.expect_var_path("01lu", value="1", type="unsigned long") + self.expect_var_path("0o1ll", value="1", type="long long") + self.expect_var_path("0x1ULL", value="1", type="unsigned long long") + self.expect_var_path("0x1llu", value="1", type="unsigned long long") + self.expect( + "frame var '1ullu'", + error=True, + substrs=["Failed to parse token as numeric-constant"], + ) + + # Check integer literal type edge cases (dil::Interpreter::PickIntegerType) + frame = thread.GetFrameAtIndex(0) + v = frame.GetValueForVariablePath("argc") + # Creating an SBType from a BasicType still requires any value from the frame + int_size = v.GetType().GetBasicType(lldb.eBasicTypeInt).GetByteSize() + long_size = v.GetType().GetBasicType(lldb.eBasicTypeLong).GetByteSize() + longlong_size = v.GetType().GetBasicType(lldb.eBasicTypeLongLong).GetByteSize() + + longlong_str = "0x" + "F" * longlong_size * 2 + longlong_str = str(int(longlong_str, 16)) + self.assert_literal_type(frame, longlong_str, lldb.eBasicTypeUnsignedLongLong) + toolong_str = "0x" + "F" * longlong_size * 2 + "F" + self.expect( + f"frame var '{toolong_str}'", + error=True, + substrs=[ + "integer literal is too large to be represented in any integer type" + ], + ) + + # These check only apply if `int` and `long` have different sizes + if int_size < long_size: + # 0xFFFFFFFF and 4294967295 will have different types even though + # the numeric value is the same + hex_str = "0x" + "F" * int_size * 2 + dec_str = str(int(hex_str, 16)) + self.assert_literal_type(frame, hex_str, lldb.eBasicTypeUnsignedInt) + self.assert_literal_type(frame, dec_str, lldb.eBasicTypeLong) + long_str = "0x" + "F" * int_size * 2 + "F" + ulong_str = long_str + "u" + self.assert_literal_type(frame, long_str, lldb.eBasicTypeLong) + self.assert_literal_type(frame, ulong_str, lldb.eBasicTypeUnsignedLong) + + def assert_literal_type(self, frame, literal, expected_type): + value = frame.GetValueForVariablePath(literal) + basic_type = value.GetType().GetBasicType() + self.assertEqual(basic_type, expected_type) diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp b/lldb/test/API/commands/frame/var-dil/expr/Literals/main.cpp similarity index 100% rename from lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp rename to lldb/test/API/commands/frame/var-dil/expr/Literals/main.cpp From f5fffc8ee25f4cf2776303108778db6d5107dfbc Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Thu, 14 Aug 2025 20:38:53 +0500 Subject: [PATCH 11/17] Parser and Eval code cleanup --- lldb/include/lldb/ValueObject/DILAST.h | 35 ++++--- lldb/source/ValueObject/DILEval.cpp | 97 ++++++++++--------- lldb/source/ValueObject/DILParser.cpp | 33 +++---- .../expr/Literals/TestFrameVarDILLiterals.py | 5 + 4 files changed, 85 insertions(+), 85 deletions(-) diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h index 7d9680c27c711..1d10755c46e39 100644 --- a/lldb/include/lldb/ValueObject/DILAST.h +++ b/lldb/include/lldb/ValueObject/DILAST.h @@ -21,9 +21,10 @@ enum class NodeKind { eArraySubscriptNode, eBitExtractionNode, eErrorNode, + eFloatLiteralNode, eIdentifierNode, + eIntegerLiteralNode, eMemberOfNode, - eScalarLiteralNode, eUnaryOpNode, }; @@ -179,52 +180,50 @@ class BitFieldExtractionNode : public ASTNode { int64_t m_last_index; }; +enum class IntegerTypeSuffix { None, Long, LongLong }; + class IntegerLiteralNode : public ASTNode { public: IntegerLiteralNode(uint32_t location, llvm::APInt value, uint32_t radix, - bool is_unsigned, bool is_long, bool is_longlong) - : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value), - m_radix(radix), m_is_unsigned(is_unsigned), m_is_long(is_long), - m_is_longlong(is_longlong) {} + bool is_unsigned, IntegerTypeSuffix type) + : ASTNode(location, NodeKind::eIntegerLiteralNode), + m_value(std::move(value)), m_radix(radix), m_is_unsigned(is_unsigned), + m_type(type) {} llvm::Expected Accept(Visitor *v) const override; - llvm::APInt GetValue() const { return m_value; } + const llvm::APInt &GetValue() const { return m_value; } uint32_t GetRadix() const { return m_radix; } bool IsUnsigned() const { return m_is_unsigned; } - bool IsLong() const { return m_is_long; } - bool IsLongLong() const { return m_is_longlong; } + IntegerTypeSuffix GetTypeSuffix() const { return m_type; } static bool classof(const ASTNode *node) { - return node->GetKind() == NodeKind::eScalarLiteralNode; + return node->GetKind() == NodeKind::eIntegerLiteralNode; } private: llvm::APInt m_value; uint32_t m_radix; bool m_is_unsigned; - bool m_is_long; - bool m_is_longlong; + IntegerTypeSuffix m_type; }; class FloatLiteralNode : public ASTNode { public: - FloatLiteralNode(uint32_t location, llvm::APFloat value, bool is_float) - : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value), - m_is_float(is_float) {} + FloatLiteralNode(uint32_t location, llvm::APFloat value) + : ASTNode(location, NodeKind::eFloatLiteralNode), + m_value(std::move(value)) {} llvm::Expected Accept(Visitor *v) const override; - llvm::APFloat GetValue() const { return m_value; } - bool IsFloat() const { return m_is_float; } + const llvm::APFloat &GetValue() const { return m_value; } static bool classof(const ASTNode *node) { - return node->GetKind() == NodeKind::eScalarLiteralNode; + return node->GetKind() == NodeKind::eFloatLiteralNode; } private: llvm::APFloat m_value; - bool m_is_float; }; /// This class contains one Visit method for each specialized type of diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp index fd6c9388337b9..b8f0279c6398d 100644 --- a/lldb/source/ValueObject/DILEval.cpp +++ b/lldb/source/ValueObject/DILEval.cpp @@ -499,19 +499,14 @@ Interpreter::Visit(const BitFieldExtractionNode *node) { return child_valobj_sp; } -static lldb::TypeSystemSP GetTypeSystemFromCU(std::shared_ptr ctx) { +static llvm::Expected +GetTypeSystemFromCU(std::shared_ptr ctx) { SymbolContext symbol_context = ctx->GetSymbolContext(lldb::eSymbolContextCompUnit); lldb::LanguageType language = symbol_context.comp_unit->GetLanguage(); symbol_context = ctx->GetSymbolContext(lldb::eSymbolContextModule); - llvm::Expected type_system = - symbol_context.module_sp->GetTypeSystemForLanguage(language); - - if (type_system) - return *type_system; - - return lldb::TypeSystemSP(); + return symbol_context.module_sp->GetTypeSystemForLanguage(language); } static CompilerType GetBasicType(lldb::TypeSystemSP type_system, @@ -531,40 +526,39 @@ Interpreter::PickIntegerType(lldb::TypeSystemSP type_system, // Binary, Octal, Hexadecimal and literals with a U suffix are allowed to be // an unsigned integer. bool unsigned_is_allowed = literal->IsUnsigned() || literal->GetRadix() != 10; + llvm::APInt apint = literal->GetValue(); // Try int/unsigned int. - uint64_t int_byte_size = 0; - if (auto temp = - GetBasicType(type_system, lldb::eBasicTypeInt).GetByteSize(ctx.get())) - int_byte_size = *temp; - unsigned int_size = int_byte_size * CHAR_BIT; - llvm::APInt apint = literal->GetValue(); - if (!literal->IsLong() && !literal->IsLongLong() && apint.isIntN(int_size)) { - if (!literal->IsUnsigned() && apint.isIntN(int_size - 1)) + llvm::Expected int_size = + GetBasicType(type_system, lldb::eBasicTypeInt).GetBitSize(ctx.get()); + if (!int_size) + return int_size.takeError(); + if (literal->GetTypeSuffix() == IntegerTypeSuffix::None && + apint.isIntN(*int_size)) { + if (!literal->IsUnsigned() && apint.isIntN(*int_size - 1)) return GetBasicType(type_system, lldb::eBasicTypeInt); if (unsigned_is_allowed) return GetBasicType(type_system, lldb::eBasicTypeUnsignedInt); } // Try long/unsigned long. - uint64_t long_byte_size = 0; - if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLong) - .GetByteSize(ctx.get())) - long_byte_size = *temp; - unsigned long_size = long_byte_size * CHAR_BIT; - if (!literal->IsLongLong() && apint.isIntN(long_size)) { - if (!literal->IsUnsigned() && apint.isIntN(long_size - 1)) + llvm::Expected long_size = + GetBasicType(type_system, lldb::eBasicTypeLong).GetBitSize(ctx.get()); + if (!long_size) + return long_size.takeError(); + if (literal->GetTypeSuffix() != IntegerTypeSuffix::LongLong && + apint.isIntN(*long_size)) { + if (!literal->IsUnsigned() && apint.isIntN(*long_size - 1)) return GetBasicType(type_system, lldb::eBasicTypeLong); if (unsigned_is_allowed) return GetBasicType(type_system, lldb::eBasicTypeUnsignedLong); } // Try long long/unsigned long long. - uint64_t long_long_byte_size = 0; - if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLongLong) - .GetByteSize(ctx.get())) - long_long_byte_size = *temp; - unsigned long_long_size = long_long_byte_size * CHAR_BIT; - if (apint.isIntN(long_long_size)) { - if (!literal->IsUnsigned() && apint.isIntN(long_long_size - 1)) + llvm::Expected long_long_size = + GetBasicType(type_system, lldb::eBasicTypeLongLong).GetBitSize(ctx.get()); + if (!long_long_size) + return long_long_size.takeError(); + if (apint.isIntN(*long_long_size)) { + if (!literal->IsUnsigned() && apint.isIntN(*long_long_size - 1)) return GetBasicType(type_system, lldb::eBasicTypeLongLong); // If we still couldn't decide a type, we probably have something that // does not fit in a signed long long, but has no U suffix. Also known as: @@ -582,31 +576,40 @@ Interpreter::PickIntegerType(lldb::TypeSystemSP type_system, llvm::Expected Interpreter::Visit(const IntegerLiteralNode *node) { - auto type_system = GetTypeSystemFromCU(m_exe_ctx_scope); + llvm::Expected type_system = + GetTypeSystemFromCU(m_exe_ctx_scope); if (!type_system) - return llvm::make_error( - m_expr, "unable to create a const literal", node->GetLocation()); + return type_system.takeError(); - auto type = PickIntegerType(type_system, m_exe_ctx_scope, node); - if (type) { - Scalar scalar = node->GetValue(); - // APInt from StringRef::getAsInteger comes with just enough bitwidth to - // hold the value. This adjusts APInt bitwidth to match the compiler type. - auto type_bitsize = type->GetBitSize(m_exe_ctx_scope.get()); - if (type_bitsize) - scalar.TruncOrExtendTo(*type_bitsize, false); - return ValueObject::CreateValueObjectFromScalar(m_target, scalar, *type, - "result"); - } else + llvm::Expected type = + PickIntegerType(*type_system, m_exe_ctx_scope, node); + if (!type) return type.takeError(); + + Scalar scalar = node->GetValue(); + // APInt from StringRef::getAsInteger comes with just enough bitwidth to + // hold the value. This adjusts APInt bitwidth to match the compiler type. + llvm::Expected type_bitsize = + type->GetBitSize(m_exe_ctx_scope.get()); + if (!type_bitsize) + return type_bitsize.takeError(); + scalar.TruncOrExtendTo(*type_bitsize, false); + return ValueObject::CreateValueObjectFromScalar(m_target, scalar, *type, + "result"); } llvm::Expected Interpreter::Visit(const FloatLiteralNode *node) { - auto type_system = GetTypeSystemFromCU(m_exe_ctx_scope); + llvm::Expected type_system = + GetTypeSystemFromCU(m_exe_ctx_scope); + if (!type_system) + return type_system.takeError(); + + bool isFloat = + &node->GetValue().getSemantics() == &llvm::APFloat::IEEEsingle(); lldb::BasicType basic_type = - node->IsFloat() ? lldb::eBasicTypeFloat : lldb::eBasicTypeDouble; - CompilerType type = GetBasicType(type_system, basic_type); + isFloat ? lldb::eBasicTypeFloat : lldb::eBasicTypeDouble; + CompilerType type = GetBasicType(*type_system, basic_type); if (!type) return llvm::make_error( diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp index 241bc84462c88..50e5d5cd00d14 100644 --- a/lldb/source/ValueObject/DILParser.cpp +++ b/lldb/source/ValueObject/DILParser.cpp @@ -386,7 +386,7 @@ ASTNodeUP DILParser::ParseNumericLiteral() { numeric_constant = ParseIntegerLiteral(); else numeric_constant = ParseFloatingPointLiteral(); - if (numeric_constant->GetKind() == NodeKind::eErrorNode) { + if (!numeric_constant) { BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}", CurToken()), CurToken().GetLocation(), CurToken().GetSpelling().length()); @@ -402,25 +402,23 @@ ASTNodeUP DILParser::ParseIntegerLiteral() { llvm::StringRef spelling_ref = spelling; auto radix = llvm::getAutoSenseRadix(spelling_ref); - bool is_unsigned = false, is_long = false, is_longlong = false; + IntegerTypeSuffix type = IntegerTypeSuffix::None; + bool is_unsigned = false; if (spelling_ref.consume_back_insensitive("u")) is_unsigned = true; if (spelling_ref.consume_back_insensitive("ll")) - is_longlong = true; - if (spelling_ref.consume_back_insensitive("l")) - is_long = true; + type = IntegerTypeSuffix::LongLong; + else if (spelling_ref.consume_back_insensitive("l")) + type = IntegerTypeSuffix::Long; // Suffix 'u' can be only specified only once, before or after 'l' if (!is_unsigned && spelling_ref.consume_back_insensitive("u")) is_unsigned = true; llvm::APInt raw_value; - if (!spelling_ref.getAsInteger(radix, raw_value)) { - Scalar scalar_value(raw_value); + if (!spelling_ref.getAsInteger(radix, raw_value)) return std::make_unique(token.GetLocation(), raw_value, - radix, is_unsigned, is_long, - is_longlong); - } - return std::make_unique(); + radix, is_unsigned, type); + return nullptr; } ASTNodeUP DILParser::ParseFloatingPointLiteral() { @@ -428,20 +426,15 @@ ASTNodeUP DILParser::ParseFloatingPointLiteral() { auto spelling = token.GetSpelling(); llvm::StringRef spelling_ref = spelling; - bool is_float = false; llvm::APFloat raw_float(llvm::APFloat::IEEEdouble()); - if (spelling_ref.consume_back_insensitive("f")) { - is_float = true; + if (spelling_ref.consume_back_insensitive("f")) raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle()); - } auto StatusOrErr = raw_float.convertFromString( spelling_ref, llvm::APFloat::rmNearestTiesToEven); - if (!errorToBool(StatusOrErr.takeError())) { - return std::make_unique(token.GetLocation(), raw_float, - is_float); - } - return std::make_unique(); + if (!errorToBool(StatusOrErr.takeError())) + return std::make_unique(token.GetLocation(), raw_float); + return nullptr; } void DILParser::Expect(Token::Kind kind) { diff --git a/lldb/test/API/commands/frame/var-dil/expr/Literals/TestFrameVarDILLiterals.py b/lldb/test/API/commands/frame/var-dil/expr/Literals/TestFrameVarDILLiterals.py index 7fbfb515035cd..0a603c56337c2 100644 --- a/lldb/test/API/commands/frame/var-dil/expr/Literals/TestFrameVarDILLiterals.py +++ b/lldb/test/API/commands/frame/var-dil/expr/Literals/TestFrameVarDILLiterals.py @@ -31,6 +31,11 @@ def test_literals(self): self.expect_var_path("0o1ll", value="1", type="long long") self.expect_var_path("0x1ULL", value="1", type="unsigned long long") self.expect_var_path("0x1llu", value="1", type="unsigned long long") + self.expect( + "frame var '1LLL'", + error=True, + substrs=["Failed to parse token as numeric-constant"], + ) self.expect( "frame var '1ullu'", error=True, From a17268610c075b50a86e248d684ade377a5870f1 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Thu, 14 Aug 2025 21:20:17 +0500 Subject: [PATCH 12/17] Expand float lexing --- lldb/source/ValueObject/DILLexer.cpp | 10 ++++++---- lldb/unittests/ValueObject/DILLexerTests.cpp | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index fd6be1ec89063..fee93b7a2a50c 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -83,8 +83,6 @@ static std::optional IsNumber(llvm::StringRef &remainder, bool &isFloat) { llvm::StringRef tail = remainder; llvm::StringRef body = tail.take_while(IsNumberBodyChar); - if (body.empty()) - return std::nullopt; size_t dots = body.count('.'); if (dots > 1 || dots == body.size()) return std::nullopt; @@ -93,13 +91,17 @@ static std::optional IsNumber(llvm::StringRef &remainder, char last = body.back(); tail = tail.drop_front(body.size()); if (last == 'e' || last == 'E' || last == 'p' || last == 'P') { - if (!tail.empty() && (tail.front() == '+' || tail.front() == '-')) { - tail = tail.drop_front(); + if (tail.consume_front("+") || tail.consume_front("-")) { tail = tail.drop_while(IsNumberBodyChar); + isFloat = true; } } size_t number_length = remainder.size() - tail.size(); llvm::StringRef number = remainder.take_front(number_length); + if (!isFloat) { + isFloat = number.contains('p') || // 0x1p1 = 2.0 + (!number.contains('x') && number.contains('e')); // 1e1 = 10.0 + } remainder = remainder.drop_front(number_length); return number; } diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp index 02b6062a604ce..66413c3aa9032 100644 --- a/lldb/unittests/ValueObject/DILLexerTests.cpp +++ b/lldb/unittests/ValueObject/DILLexerTests.cpp @@ -159,8 +159,9 @@ TEST(DILLexerTests, NumbersTest) { // These strings should lex into number tokens. std::vector valid_integers = {"123", "0x123", "0123", "0b101"}; std::vector valid_floats = { - "1.2", ".2", "2.f", "0x1.2", "0x.2", ".2e1f", - "2.e+1f", "0x1.f", "0x1.2p1", "0x1.p-1f", "0x1.2p+3f"}; + "1.2", ".2", "2.f", "0x1.2", "0x.2", ".2e1f", + "2.e+1f", "0x1.f", "0x1.2p1", "0x1.p-1f", "0x1.2p+3f", "1e1", + "1e+1", "0x1p1", "0x1p+1", "0xf.fp1f"}; // The lexer can lex these strings, but they should not be numbers. std::vector invalid_numbers = {"", "x123", "b123", "a.b"}; From 1882999c82de010b40a0d759787e5f1a779aa304 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Thu, 14 Aug 2025 21:29:23 +0500 Subject: [PATCH 13/17] Add long Date: Sun, 17 Aug 2025 18:06:23 +0500 Subject: [PATCH 14/17] Add '0xe+1' case to lexing --- lldb/source/ValueObject/DILLexer.cpp | 18 ++++++++---------- lldb/unittests/ValueObject/DILLexerTests.cpp | 6 +++--- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index fee93b7a2a50c..a2544a8e22737 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -88,20 +88,18 @@ static std::optional IsNumber(llvm::StringRef &remainder, return std::nullopt; if (IsDigit(body.front()) || (body[0] == '.' && IsDigit(body[1]))) { isFloat = dots == 1; - char last = body.back(); tail = tail.drop_front(body.size()); - if (last == 'e' || last == 'E' || last == 'p' || last == 'P') { - if (tail.consume_front("+") || tail.consume_front("-")) { - tail = tail.drop_while(IsNumberBodyChar); - isFloat = true; - } + bool isHex = body.contains_insensitive('x'); + bool hasExp = !isHex && body.contains_insensitive('e'); + bool hasHexExp = isHex && body.contains_insensitive('p'); + if (hasExp || hasHexExp) { + isFloat = true; // This marks numbers like 0x1p1 and 1e1 as float + if (body.ends_with_insensitive("e") || body.ends_with_insensitive("p")) + if (tail.consume_front("+") || tail.consume_front("-")) + tail = tail.drop_while(IsNumberBodyChar); } size_t number_length = remainder.size() - tail.size(); llvm::StringRef number = remainder.take_front(number_length); - if (!isFloat) { - isFloat = number.contains('p') || // 0x1p1 = 2.0 - (!number.contains('x') && number.contains('e')); // 1e1 = 10.0 - } remainder = remainder.drop_front(number_length); return number; } diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp index 66413c3aa9032..67e97249dc658 100644 --- a/lldb/unittests/ValueObject/DILLexerTests.cpp +++ b/lldb/unittests/ValueObject/DILLexerTests.cpp @@ -160,8 +160,8 @@ TEST(DILLexerTests, NumbersTest) { std::vector valid_integers = {"123", "0x123", "0123", "0b101"}; std::vector valid_floats = { "1.2", ".2", "2.f", "0x1.2", "0x.2", ".2e1f", - "2.e+1f", "0x1.f", "0x1.2p1", "0x1.p-1f", "0x1.2p+3f", "1e1", - "1e+1", "0x1p1", "0x1p+1", "0xf.fp1f"}; + "2.e+1f", "0x1.f", "0x1.2P1", "0x1.p-1f", "0x1.2P+3f", "1E1", + "1E+1", "0x1p1", "0x1p+1", "0xf.fp1f"}; // The lexer can lex these strings, but they should not be numbers. std::vector invalid_numbers = {"", "x123", "b123", "a.b"}; @@ -192,7 +192,7 @@ TEST(DILLexerTests, NumbersTest) { // Verify that '-' and '+' are not lexed if they're not part of a number std::vector expressions = {"1+e", "0x1+p", "1.1+e", "1.1e1+e", "0x1.1p-1-p", "1e-1+e", - "1e1+e", "0x1p-1-p"}; + "1e1+e", "0x1p-1-p", "0xe+e"}; for (auto &str : expressions) { SCOPED_TRACE(str); llvm::Expected maybe_lexer = DILLexer::Create(str); From cd9dfc66c4e89e8474476c5e3d269070dd882fec Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Sun, 17 Aug 2025 18:27:53 +0500 Subject: [PATCH 15/17] Code cleanup --- lldb/docs/dil-expr-lang.ebnf | 3 ++- lldb/include/lldb/ValueObject/DILLexer.h | 2 +- lldb/source/ValueObject/DILEval.cpp | 6 ++---- lldb/source/ValueObject/DILLexer.cpp | 6 +++--- lldb/source/ValueObject/DILParser.cpp | 2 +- .../basics/ArraySubscript/TestFrameVarDILArraySubscript.py | 2 +- lldb/unittests/ValueObject/DILLexerTests.cpp | 4 ++-- 7 files changed, 12 insertions(+), 13 deletions(-) diff --git a/lldb/docs/dil-expr-lang.ebnf b/lldb/docs/dil-expr-lang.ebnf index da1796d936c6a..67328939ba420 100644 --- a/lldb/docs/dil-expr-lang.ebnf +++ b/lldb/docs/dil-expr-lang.ebnf @@ -15,7 +15,8 @@ postfix_expression = primary_expression | postfix_expression "." id_expression | postfix_expression "->" id_expression ; -primary_expression = id_expression +primary_expression = numeric_literal + | id_expression | "(" expression ")" ; id_expression = unqualified_id diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h index cbffb6375778c..4345e6ce7f26b 100644 --- a/lldb/include/lldb/ValueObject/DILLexer.h +++ b/lldb/include/lldb/ValueObject/DILLexer.h @@ -28,7 +28,7 @@ class Token { arrow, coloncolon, eof, - floating_constant, + float_constant, identifier, integer_constant, l_paren, diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp index b8f0279c6398d..86eb902603047 100644 --- a/lldb/source/ValueObject/DILEval.cpp +++ b/lldb/source/ValueObject/DILEval.cpp @@ -512,11 +512,9 @@ GetTypeSystemFromCU(std::shared_ptr ctx) { static CompilerType GetBasicType(lldb::TypeSystemSP type_system, lldb::BasicType basic_type) { if (type_system) - if (auto compiler_type = type_system.get()->GetBasicTypeFromAST(basic_type)) - return compiler_type; + return type_system.get()->GetBasicTypeFromAST(basic_type); - CompilerType empty_type; - return empty_type; + return CompilerType(); } llvm::Expected diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index a2544a8e22737..0b2288a9d9230 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -28,8 +28,8 @@ llvm::StringRef Token::GetTokenName(Kind kind) { return "coloncolon"; case Kind::eof: return "eof"; - case Kind::floating_constant: - return "floating_constant"; + case Kind::float_constant: + return "float_constant"; case Kind::identifier: return "identifier"; case Kind::integer_constant: @@ -133,7 +133,7 @@ llvm::Expected DILLexer::Lex(llvm::StringRef expr, bool isFloat = false; std::optional maybe_number = IsNumber(remainder, isFloat); if (maybe_number) { - auto kind = isFloat ? Token::floating_constant : Token::integer_constant; + auto kind = isFloat ? Token::float_constant : Token::integer_constant; return Token(kind, maybe_number->str(), position); } std::optional maybe_word = IsWord(expr, remainder); diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp index 50e5d5cd00d14..8c4f7fdb25bea 100644 --- a/lldb/source/ValueObject/DILParser.cpp +++ b/lldb/source/ValueObject/DILParser.cpp @@ -184,7 +184,7 @@ ASTNodeUP DILParser::ParsePostfixExpression() { // "(" expression ")" // ASTNodeUP DILParser::ParsePrimaryExpression() { - if (CurToken().IsOneOf({Token::integer_constant, Token::floating_constant})) + if (CurToken().IsOneOf({Token::integer_constant, Token::float_constant})) return ParseNumericLiteral(); if (CurToken().IsOneOf( {Token::coloncolon, Token::identifier, Token::l_paren})) { diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py index 9c0d9ec252728..cd67aa376135d 100644 --- a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py +++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py @@ -66,7 +66,7 @@ def test_subscript(self): self.expect( "frame var 'int_arr[1.0]'", error=True, - substrs=["failed to parse integer constant: <'1.0' (floating_constant)>"], + substrs=["failed to parse integer constant: <'1.0' (float_constant)>"], ) # Test accessing bits in scalar types. diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp index 67e97249dc658..5c78b82f822cf 100644 --- a/lldb/unittests/ValueObject/DILLexerTests.cpp +++ b/lldb/unittests/ValueObject/DILLexerTests.cpp @@ -176,7 +176,7 @@ TEST(DILLexerTests, NumbersTest) { SCOPED_TRACE(str); EXPECT_THAT_EXPECTED(ExtractTokenData(str), llvm::HasValue(testing::ElementsAre( - testing::Pair(Token::floating_constant, str)))); + testing::Pair(Token::float_constant, str)))); } // Verify that none of the invalid numbers come out as numeric tokens. for (auto &str : invalid_numbers) { @@ -200,7 +200,7 @@ TEST(DILLexerTests, NumbersTest) { DILLexer lexer(*maybe_lexer); Token token = lexer.GetCurrentToken(); EXPECT_TRUE( - token.IsOneOf({Token::integer_constant, Token::floating_constant})); + token.IsOneOf({Token::integer_constant, Token::float_constant})); lexer.Advance(); token = lexer.GetCurrentToken(); EXPECT_TRUE(token.IsOneOf({Token::plus, Token::minus})); From 8d44b9df6ebf8b05ddc86a967354ab900cae3aa7 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Sun, 17 Aug 2025 19:23:58 +0500 Subject: [PATCH 16/17] Refactor PickIntegerType --- lldb/source/ValueObject/DILEval.cpp | 62 +++++++++++------------------ 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp index 86eb902603047..c6cf41ee9e9ee 100644 --- a/lldb/source/ValueObject/DILEval.cpp +++ b/lldb/source/ValueObject/DILEval.cpp @@ -526,46 +526,30 @@ Interpreter::PickIntegerType(lldb::TypeSystemSP type_system, bool unsigned_is_allowed = literal->IsUnsigned() || literal->GetRadix() != 10; llvm::APInt apint = literal->GetValue(); - // Try int/unsigned int. - llvm::Expected int_size = - GetBasicType(type_system, lldb::eBasicTypeInt).GetBitSize(ctx.get()); - if (!int_size) - return int_size.takeError(); - if (literal->GetTypeSuffix() == IntegerTypeSuffix::None && - apint.isIntN(*int_size)) { - if (!literal->IsUnsigned() && apint.isIntN(*int_size - 1)) - return GetBasicType(type_system, lldb::eBasicTypeInt); - if (unsigned_is_allowed) - return GetBasicType(type_system, lldb::eBasicTypeUnsignedInt); - } - // Try long/unsigned long. - llvm::Expected long_size = - GetBasicType(type_system, lldb::eBasicTypeLong).GetBitSize(ctx.get()); - if (!long_size) - return long_size.takeError(); - if (literal->GetTypeSuffix() != IntegerTypeSuffix::LongLong && - apint.isIntN(*long_size)) { - if (!literal->IsUnsigned() && apint.isIntN(*long_size - 1)) - return GetBasicType(type_system, lldb::eBasicTypeLong); - if (unsigned_is_allowed) - return GetBasicType(type_system, lldb::eBasicTypeUnsignedLong); - } - // Try long long/unsigned long long. - llvm::Expected long_long_size = - GetBasicType(type_system, lldb::eBasicTypeLongLong).GetBitSize(ctx.get()); - if (!long_long_size) - return long_long_size.takeError(); - if (apint.isIntN(*long_long_size)) { - if (!literal->IsUnsigned() && apint.isIntN(*long_long_size - 1)) - return GetBasicType(type_system, lldb::eBasicTypeLongLong); - // If we still couldn't decide a type, we probably have something that - // does not fit in a signed long long, but has no U suffix. Also known as: - // - // warning: integer literal is too large to be represented in a signed - // integer type, interpreting as unsigned [-Wimplicitly-unsigned-literal] - // - return GetBasicType(type_system, lldb::eBasicTypeUnsignedLongLong); + llvm::SmallVector, 3> candidates; + if (literal->GetTypeSuffix() <= IntegerTypeSuffix::None) + candidates.emplace_back(lldb::eBasicTypeInt, + unsigned_is_allowed ? lldb::eBasicTypeUnsignedInt + : lldb::eBasicTypeInvalid); + if (literal->GetTypeSuffix() <= IntegerTypeSuffix::Long) + candidates.emplace_back(lldb::eBasicTypeLong, + unsigned_is_allowed ? lldb::eBasicTypeUnsignedLong + : lldb::eBasicTypeInvalid); + candidates.emplace_back(lldb::eBasicTypeLongLong, + lldb::eBasicTypeUnsignedLongLong); + for (auto [signed_, unsigned_] : candidates) { + CompilerType signed_type = type_system->GetBasicTypeFromAST(signed_); + if (!signed_type) + continue; + llvm::Expected size = signed_type.GetBitSize(ctx.get()); + if (!size) + return size.takeError(); + if (!literal->IsUnsigned() && apint.isIntN(*size - 1)) + return signed_type; + if (unsigned_ != lldb::eBasicTypeInvalid && apint.isIntN(*size)) + return type_system->GetBasicTypeFromAST(unsigned_); } + return llvm::make_error( m_expr, "integer literal is too large to be represented in any integer type", From 0a7aa2b4e80ee571bd72403b9cd77e3cfb83d245 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Wed, 27 Aug 2025 15:21:19 +0500 Subject: [PATCH 17/17] Fix a test after rebasing --- .../ArraySubscript/TestFrameVarDILArraySubscript.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py index cd67aa376135d..f47e86266f474 100644 --- a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py +++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py @@ -72,8 +72,9 @@ def test_subscript(self): # Test accessing bits in scalar types. self.expect_var_path("idx_1[0]", value="1") self.expect_var_path("idx_1[1]", value="0") + self.expect_var_path("1[0]", value="1") - # Bit adcess not valid for a reference. + # Bit access not valid for a reference. self.expect( "frame var 'idx_1_ref[0]'", error=True, @@ -86,11 +87,6 @@ def test_subscript(self): error=True, substrs=["failed to parse integer constant"], ) - self.expect( - "frame var '1[2]'", - error=True, - substrs=["subscripted value is not an array or pointer"], - ) # Base should not be a pointer to void self.expect(