Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions lldb/docs/dil-expr-lang.ebnf
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
(* This is currently a subset of the final DIL Language, matching the current
DIL implementation. *)

expression = unary_expression ;
expression = cast_expression;

cast_expression = unary_expression
| "(" type_id ")" cast_expression;

unary_expression = postfix_expression
| unary_operator expression ;
| unary_operator cast_expression ;

unary_operator = "*" | "&" ;

Expand Down Expand Up @@ -44,10 +47,28 @@ nested_name_specifier = type_name "::"
| namespace_name '::'
| nested_name_specifier identifier "::" ;

type_id = type_specifier_seq [abstract_declarator] ;

type_specifier_seq = type_specifier [type_specifier];

type_specifier = ["::"] [nested_name_specifier] type_name
| builtin_typename ;

nested_name_specifier = type_name "::"
| namespace_name "::"
| nested_name_specifier identifier "::" ;

abstract_declarator = ptr_operator [abstract_declarator] ;

ptr_operator = "*"
| "&";

type_name = class_name
| enum_name
| typedef_name;

builtin_typename = identifier_seq;

class_name = identifier ;

enum_name = identifier ;
Expand All @@ -56,6 +77,7 @@ typedef_name = identifier ;

namespace_name = identifier ;


identifier_seq = identifier
| identifier identifier_seq;


34 changes: 34 additions & 0 deletions lldb/include/lldb/ValueObject/DILAST.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ enum class NodeKind {
eArraySubscriptNode,
eBitExtractionNode,
eBooleanLiteralNode,
eCastNode,
eErrorNode,
eFloatLiteralNode,
eIdentifierNode,
Expand All @@ -35,6 +36,14 @@ enum class UnaryOpKind {
Deref, // "*"
};

/// The type casts allowed by DIL.
enum class CastKind {
eEnumeration, /// Casting from a scalar to an enumeration type
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doxygen syntax is either

/// Casting from a scalar to an enumeration type.
eEnumeration, 

or

eEnumeration, ///< Casting from a scalar to an enumeration type.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh! I didn't know that. Thanks! (Fixed).

eNullptr, /// Casting to a nullptr type
eReference, /// Casting to a reference type
eNone, /// Type promotion casting
};

/// Forward declaration, for use in DIL AST nodes. Definition is at the very
/// end of this file.
class Visitor;
Expand Down Expand Up @@ -244,6 +253,29 @@ class BooleanLiteralNode : public ASTNode {
bool m_value;
};

class CastNode : public ASTNode {
public:
CastNode(uint32_t location, CompilerType type, ASTNodeUP operand,
CastKind kind)
: ASTNode(location, NodeKind::eCastNode), m_type(type),
m_operand(std::move(operand)), m_cast_kind(kind) {}

llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;

CompilerType GetType() const { return m_type; }
ASTNode *GetOperand() const { return m_operand.get(); }
CastKind GetCastKind() const { return m_cast_kind; }

static bool classof(const ASTNode *node) {
return node->GetKind() == NodeKind::eCastNode;
}

private:
CompilerType m_type;
ASTNodeUP m_operand;
CastKind m_cast_kind;
};

/// This class contains one Visit method for each specialized type of
/// DIL AST node. The Visit methods are used to dispatch a DIL AST node to
/// the correct function in the DIL expression evaluator for evaluating that
Expand All @@ -267,6 +299,8 @@ class Visitor {
Visit(const FloatLiteralNode *node) = 0;
virtual llvm::Expected<lldb::ValueObjectSP>
Visit(const BooleanLiteralNode *node) = 0;
virtual llvm::Expected<lldb::ValueObjectSP>
Visit(const CastNode *node) = 0;
};

} // namespace lldb_private::dil
Expand Down
2 changes: 2 additions & 0 deletions lldb/include/lldb/ValueObject/DILEval.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ class Interpreter : Visitor {
Visit(const FloatLiteralNode *node) override;
llvm::Expected<lldb::ValueObjectSP>
Visit(const BooleanLiteralNode *node) override;
llvm::Expected<lldb::ValueObjectSP>
Visit(const CastNode *node) override;

llvm::Expected<CompilerType>
PickIntegerType(lldb::TypeSystemSP type_system,
Expand Down
6 changes: 6 additions & 0 deletions lldb/include/lldb/ValueObject/DILParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ class DILParser {
ASTNodeUP ParseFloatingPointLiteral();
ASTNodeUP ParseBooleanLiteral();

ASTNodeUP ParseCastExpression();
std::optional<CompilerType> ParseBuiltinType();
std::optional<CompilerType> ParseTypeId();
CompilerType ResolveTypeDeclarators(CompilerType type,
const std::vector<Token> &ptr_operators);

void BailOut(const std::string &error, uint32_t loc, uint16_t err_len);

void Expect(Token::Kind kind);
Expand Down
4 changes: 4 additions & 0 deletions lldb/source/ValueObject/DILAST.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,8 @@ BooleanLiteralNode::Accept(Visitor *v) const {
return v->Visit(this);
}

llvm::Expected<lldb::ValueObjectSP> CastNode::Accept(Visitor *v) const {
return v->Visit(this);
}

} // namespace lldb_private::dil
12 changes: 12 additions & 0 deletions lldb/source/ValueObject/DILEval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -608,4 +608,16 @@ Interpreter::Visit(const BooleanLiteralNode *node) {
return ValueObject::CreateValueObjectFromBool(m_target, value, "result");
}

llvm::Expected<lldb::ValueObjectSP>
Interpreter::Visit(const CastNode *node) {
auto operand_or_err = Evaluate(node->GetOperand());
if (!operand_or_err)
return operand_or_err;

lldb::ValueObjectSP operand = *operand_or_err;
// Don't actually do the cast for now -- that code will be added later.
// For now just return the original operand, unchanged.
return operand;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we aim to merge this PR separately? Is so, this should return an unimplemented error (or something like that) for now, so that the calling code can fall back to full expression evaluator.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes we plan to merge it separately. I don't think we need to advertise that it doesn't work properly yet (since we're not advertising that this feature is going in at all). So I'm ok with 'type casting' silently doing nothing for now. But if others disagree I could return an error here for now...

Other opinions?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just since frame var implementation is used for expression evaluation in lldb-dap, if a user writes an expression with a cast, but DIL doesn't do it and returns some value, that will be a wrong value the user didn't expect. If DIL returns an error, the calling code will instead use a full expression evaluator. For now we can just return the same parser error as before this patch, to avoid confusing people who might read the error message,

Copy link
Collaborator

@jimingham jimingham Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm worried about this terminology. frame var is not and should not be expression evaluation. It's for static inspection of memory, it doesn't do operator overloads in general, and doesn't construct temporary objects or call copy constructors or anything like that. It just looks at static values in memory. We should not give the impression that frame var is the expression evaluator.

This project started because some folks wanted to make it easier to write data formatters (which should be non-code-running for efficiency & stability) using "value path expressions" that required some operations we didn't support in the frame var language. In that use case, presumably the authors know what they are doing and won't confuse DIL with the real language specific expression evaluator.

Even dwim-print requires context in this regard. After all, an -> access of a value that has an -> overload is going to be different in dwim-print. That command works for common cases but really does require the user to know what they are doing, since there will be cases where both frame var and expr return valid but different results.
So it seems to me we should be careful of anything we say or do that confuses the two.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even dwim-print requires context in this regard. After all, an -> access of a value that has an -> overload is going to be different in dwim-print. That command works for common cases but really does require the user to know what they are doing, since there will be cases where both frame var and expr return valid but different results.
So it seems to me we should be careful of anything we say or do that confuses the two.

I haven't really thought about that. We already replaced old frame var with DIL, which is fine for now since it has basically the same functionality, but will become more of a problem as we add more operators. Compared to dwim-print, lldb-dap doesn't do any checks and just tries to use frame var for expressions, which means this was an existing problem when a -> appeared. Even with the old implementation, the overloaded operator wouldn't be used.

I guess we'll have to think again if this was a correct approach to replace frame var, or if we should just add the same guards dwim-print does to lldb-dap. But I personally really hoped this would just speed up expression evaluation for everyone.

Also, in DIL we could potentially implement overloaded operators, at least with ABI calls, I don't know how limited or accurate this would be though.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can implement an overloaded -> operator using a synthetic child provider already if you want to.

But I actually think it's a good thing not a problem to have a "values in memory" view of values and a "how the programming language would present them" view of values. After all, if you're -> operator is misbehaving, or is not letting you look at the value because it doesn't pass some check, you still need some way to just see the dereference in memory.

So long as we are clear about: "frame var shows values in memory" and "expr accesses values as the programming language would" I think this is a useful and understandable split. We also have dwim-print for people that don't want to be bothered with this distinction and happy to let lldb choose the most useful representation for them.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds reasonable. But I would like to have an option to use DIL for expressions anyway, maybe we can just add another LLDB setting.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returning (no pun intended) to the original question here: Is it OK, for now, for this function to return the unchanged operand? Especially because the PR to do the actual type casting will be sent for review as soon as this one lands?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If offline conversation kuilpd convinced me that this should return an error for now, since it's not actually doing the typecast. Have updated the code to do that.

}

} // namespace lldb_private::dil
167 changes: 163 additions & 4 deletions lldb/source/ValueObject/DILParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//

#include "lldb/ValueObject/DILParser.h"
#include "lldb/Symbol/CompileUnit.h"
#include "lldb/Target/ExecutionContextScope.h"
#include "lldb/Target/LanguageRuntime.h"
#include "lldb/Utility/DiagnosticsRendering.h"
#include "lldb/ValueObject/DILAST.h"
#include "lldb/ValueObject/DILEval.h"
Expand Down Expand Up @@ -80,15 +82,63 @@ ASTNodeUP DILParser::Run() {
// Parse an expression.
//
// expression:
// unary_expression
// cast_expression
//
ASTNodeUP DILParser::ParseExpression() { return ParseUnaryExpression(); }
ASTNodeUP DILParser::ParseExpression() { return ParseCastExpression(); }

// Parse a cast_expression.
//
// cast_expression:
// unary_expression
// "(" type_id ")" cast_expression

ASTNodeUP DILParser::ParseCastExpression() {
if (!CurToken().Is(Token::l_paren))
return ParseUnaryExpression();

// This could be a type cast, try parsing the contents as a type declaration.
Token token = CurToken();
uint32_t loc = token.GetLocation();

// Enable lexer backtracking, so that we can rollback in case it's not
// actually a type declaration.

// Start tentative parsing (save token location/idx, for possible rollback).
uint32_t save_token_idx = m_dil_lexer.GetCurrentTokenIdx();

// Consume the token only after enabling the backtracking.
m_dil_lexer.Advance();

// Try parsing the type declaration. If the returned value is not valid,
// then we should rollback and try parsing the expression.
auto type_id = ParseTypeId();
if (type_id) {
// Successfully parsed the type declaration. Commit the backtracked
// tokens and parse the cast_expression.

if (!type_id.value().IsValid())
return std::make_unique<ErrorNode>();

Expect(Token::r_paren);
m_dil_lexer.Advance();
auto rhs = ParseCastExpression();

return std::make_unique<CastNode>(
loc, type_id.value(), std::move(rhs), CastKind::eNone);
}

// Failed to parse the contents of the parentheses as a type declaration.
// Rollback the lexer and try parsing it as unary_expression.
TentativeParsingRollback(save_token_idx);

return ParseUnaryExpression();
}

// Parse an unary_expression.
//
// unary_expression:
// postfix_expression
// unary_operator expression
// unary_operator cast_expression
//
// unary_operator:
// "&"
Expand All @@ -99,7 +149,7 @@ ASTNodeUP DILParser::ParseUnaryExpression() {
Token token = CurToken();
uint32_t loc = token.GetLocation();
m_dil_lexer.Advance();
auto rhs = ParseExpression();
auto rhs = ParseCastExpression();
switch (token.GetKind()) {
case Token::star:
return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::Deref,
Expand Down Expand Up @@ -274,6 +324,81 @@ std::string DILParser::ParseNestedNameSpecifier() {
}
}

// Parse a type_id.
//
// type_id:
// type_specifier_seq [abstract_declarator]
//
// type_specifier_seq:
// type_specifier [type_specifier]
//
// type_specifier:
// ["::"] [nested_name_specifier] type_name // not handled for now!
// builtin_typename
//
std::optional<CompilerType> DILParser::ParseTypeId() {
CompilerType type;
// For now only allow builtin types -- will expand add to this later.
auto maybe_builtin_type = ParseBuiltinType();
if (maybe_builtin_type) {
type = *maybe_builtin_type;
} else
return {};

//
// abstract_declarator:
// ptr_operator [abstract_declarator]
//
std::vector<Token> ptr_operators;
while (CurToken().IsOneOf({Token::star, Token::amp})) {
Token tok = CurToken();
ptr_operators.push_back(std::move(tok));
m_dil_lexer.Advance();
}
type = ResolveTypeDeclarators(type, ptr_operators);

return type;
}

// Parse a built-in type
//
// builtin_typename:
// identifer_seq
//
// identifier_seq
// identifer [identifier_seq]
//
// A built-in type can be a single identifier or a space-separated
// list of identifiers (e.g. "short" or "long long").
std::optional<CompilerType> DILParser::ParseBuiltinType() {
std::string type_name = "";
uint32_t save_token_idx = m_dil_lexer.GetCurrentTokenIdx();
bool first_word = true;
while (CurToken().GetKind() == Token::identifier) {
if (CurToken().GetSpelling() == "const" ||
CurToken().GetSpelling() == "volatile")
continue;
if (!first_word)
type_name.push_back(' ');
else
first_word = false;
type_name.append(CurToken().GetSpelling());
m_dil_lexer.Advance();
}

if (type_name.size() > 0) {
lldb::TargetSP target_sp = m_ctx_scope->CalculateTarget();
ConstString const_type_name(type_name.c_str());
for (auto type_system_sp : target_sp->GetScratchTypeSystems())
if (auto compiler_type =
type_system_sp->GetBuiltinTypeByName(const_type_name))
return compiler_type;
}

TentativeParsingRollback(save_token_idx);
return {};
}

// Parse an id_expression.
//
// id_expression:
Expand Down Expand Up @@ -339,6 +464,40 @@ std::string DILParser::ParseUnqualifiedId() {
return identifier;
}

CompilerType
DILParser::ResolveTypeDeclarators(CompilerType type,
const std::vector<Token> &ptr_operators) {
// Resolve pointers/references.
for (Token tk : ptr_operators) {
uint32_t loc = tk.GetLocation();
if (tk.GetKind() == Token::star) {
// Pointers to reference types are forbidden.
if (type.IsReferenceType()) {
BailOut(llvm::formatv("'type name' declared as a pointer to a "
"reference of type {0}",
type.TypeDescription()),
loc, CurToken().GetSpelling().length());
return {};
}
// Get pointer type for the base type: e.g. int* -> int**.
type = type.GetPointerType();

} else if (tk.GetKind() == Token::amp) {
// References to references are forbidden.
// FIXME: In future we may want to allow rvalue references (i.e. &&).
if (type.IsReferenceType()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a FIXME for rvalue references (i.e., &&)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

BailOut("type name declared as a reference to a reference", loc,
CurToken().GetSpelling().length());
return {};
}
// Get reference type for the base type: e.g. int -> int&.
type = type.GetLValueReferenceType();
}
}

return type;
}

// Parse an boolean_literal.
//
// boolean_literal:
Expand Down