From 01961abb3c6c77eefb7cb94ad73003c3c9dfb1af Mon Sep 17 00:00:00 2001 From: Roberto Raggi Date: Sun, 10 Aug 2025 11:16:33 +0200 Subject: [PATCH] Initial clean up of the compiler frontend driver --- src/frontend/CMakeLists.txt | 3 + src/frontend/cxx/check_expression_types.cc | 83 ++++ src/frontend/cxx/check_expression_types.h | 29 ++ src/frontend/cxx/cxx.cc | 65 ++++ src/frontend/cxx/dump_tokens.cc | 71 ++++ src/frontend/cxx/dump_tokens.h | 38 ++ src/frontend/cxx/frontend.cc | 429 +++++++++++---------- src/frontend/cxx/frontend.h | 93 +++++ src/parser/cxx/translation_unit.cc | 3 +- src/parser/cxx/translation_unit.h | 4 + 10 files changed, 604 insertions(+), 214 deletions(-) create mode 100644 src/frontend/cxx/check_expression_types.cc create mode 100644 src/frontend/cxx/check_expression_types.h create mode 100644 src/frontend/cxx/cxx.cc create mode 100644 src/frontend/cxx/dump_tokens.cc create mode 100644 src/frontend/cxx/dump_tokens.h create mode 100644 src/frontend/cxx/frontend.h diff --git a/src/frontend/CMakeLists.txt b/src/frontend/CMakeLists.txt index 6181afac..6f6bc933 100644 --- a/src/frontend/CMakeLists.txt +++ b/src/frontend/CMakeLists.txt @@ -19,8 +19,11 @@ add_executable(cxx + cxx/cxx.cc + cxx/dump_tokens.cc cxx/frontend.cc cxx/verify_diagnostics_client.cc + cxx/check_expression_types.cc ) target_link_libraries(cxx PRIVATE cxx-lsp) diff --git a/src/frontend/cxx/check_expression_types.cc b/src/frontend/cxx/check_expression_types.cc new file mode 100644 index 00000000..d295c265 --- /dev/null +++ b/src/frontend/cxx/check_expression_types.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2025 Roberto Raggi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "check_expression_types.h" + +#include +#include + +#include + +namespace cxx { + +namespace { + +class CheckExpressionTypes final : private ASTVisitor { + public: + [[nodiscard]] auto operator()(TranslationUnit* unit) { + std::size_t missingTypes = 0; + std::swap(unit_, unit); + std::swap(missingTypes_, missingTypes); + + accept(unit_->ast()); + + std::swap(unit_, unit); + std::swap(missingTypes_, missingTypes); + + return missingTypes == 0; + } + + private: + using ASTVisitor::visit; + + auto preVisit(AST* ast) -> bool override { + if (ast_cast(ast)) { + // skip template declarations, as they are not instantiated yet + return false; + } + + if (auto expression = ast_cast(ast)) { + if (!expression->type) { + const auto loc = expression->firstSourceLocation(); + + unit_->warning(loc, std::format("untyped expression of kind '{}'", + to_string(expression->kind()))); + + ++missingTypes_; + return false; + } + } + + return true; // visit children + } + + private: + TranslationUnit* unit_ = nullptr; + std::size_t missingTypes_ = 0; +}; + +} // namespace + +auto checkExpressionTypes(TranslationUnit& unit) -> bool { + CheckExpressionTypes checkExpressionTypes; + return checkExpressionTypes(&unit); +} + +} // namespace cxx \ No newline at end of file diff --git a/src/frontend/cxx/check_expression_types.h b/src/frontend/cxx/check_expression_types.h new file mode 100644 index 00000000..95daadde --- /dev/null +++ b/src/frontend/cxx/check_expression_types.h @@ -0,0 +1,29 @@ +// Copyright (c) 2025 Roberto Raggi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include + +namespace cxx { + +[[nodiscard]] auto checkExpressionTypes(TranslationUnit& unit) -> bool; + +} diff --git a/src/frontend/cxx/cxx.cc b/src/frontend/cxx/cxx.cc new file mode 100644 index 00000000..34c1f97b --- /dev/null +++ b/src/frontend/cxx/cxx.cc @@ -0,0 +1,65 @@ +// Copyright (c) 2025 Roberto Raggi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include + +#include + +#include "frontend.h" + +auto main(int argc, char* argv[]) -> int { + cxx::CLI cli; + cli.parse(argc, argv); + + if (cli.opt_help) { + cli.showHelp(); + return EXIT_SUCCESS; + } + + const auto& inputFiles = cli.positionals(); + + if (cli.opt_lsp_test) { + cli.opt_lsp = true; + } + + if (!cli.opt_lsp && inputFiles.empty()) { + std::cerr << "cxx: no input files" << std::endl + << "Usage: cxx [options] file..." << std::endl; + return EXIT_FAILURE; + } + + if (cli.opt_lsp) { + auto server = cxx::lsp::Server{cli}; + + return server.start(); + } + + auto existStatus = EXIT_SUCCESS; + + for (const auto& fileName : inputFiles) { + cxx::Frontend runOnFile(cli, fileName); + + if (!runOnFile()) { + existStatus = EXIT_FAILURE; + } + } + + return existStatus; +} diff --git a/src/frontend/cxx/dump_tokens.cc b/src/frontend/cxx/dump_tokens.cc new file mode 100644 index 00000000..b22f0ac6 --- /dev/null +++ b/src/frontend/cxx/dump_tokens.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2025 Roberto Raggi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "dump_tokens.h" + +#include + +#include +#include + +namespace cxx { + +DumpTokens::DumpTokens(const CLI& cli) : cli(cli) {} + +void DumpTokens::operator()(TranslationUnit& unit, std::ostream& output) { + auto lang = LanguageKind::kCXX; + + if (auto x = cli.getSingle("x")) { + if (x == "c") lang = LanguageKind::kC; + } else if (unit.fileName().ends_with(".c")) { + lang = LanguageKind::kC; + } + + std::string flags; + + for (SourceLocation loc(1);; loc = loc.next()) { + const auto& tk = unit.tokenAt(loc); + + flags.clear(); + + if (tk.startOfLine()) { + flags += " [start-of-line]"; + } + + if (tk.leadingSpace()) { + flags += " [leading-space]"; + } + + auto kind = tk.kind(); + if (kind == TokenKind::T_IDENTIFIER) { + kind = Lexer::classifyKeyword(tk.spell(), lang); + } + + output << std::format("{} '{}'{}", Token::name(kind), tk.spell(), flags); + + auto pos = unit.tokenStartPosition(loc); + + output << std::format(" at {}:{}:{}\n", pos.fileName, pos.line, pos.column); + + if (tk.is(TokenKind::T_EOF_SYMBOL)) break; + } +} + +} // namespace cxx \ No newline at end of file diff --git a/src/frontend/cxx/dump_tokens.h b/src/frontend/cxx/dump_tokens.h new file mode 100644 index 00000000..664a7f26 --- /dev/null +++ b/src/frontend/cxx/dump_tokens.h @@ -0,0 +1,38 @@ +// Copyright (c) 2025 Roberto Raggi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +namespace cxx { + +class DumpTokens { + public: + DumpTokens(const CLI& cli); + + void operator()(TranslationUnit& unit, std::ostream& output); + + private: + const CLI& cli; +}; + +} // namespace cxx diff --git a/src/frontend/cxx/frontend.cc b/src/frontend/cxx/frontend.cc index 78fc6531..368f64b6 100644 --- a/src/frontend/cxx/frontend.cc +++ b/src/frontend/cxx/frontend.cc @@ -18,6 +18,8 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. +#include "frontend.h" + #include #include #include @@ -26,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -46,134 +47,162 @@ #include #include #include -#include -#include #include +#include "check_expression_types.h" +#include "dump_tokens.h" #include "verify_diagnostics_client.h" -namespace { +namespace cxx { + +Frontend::Frontend(const CLI& cli, std::string fileName) + : cli(cli), fileName_(std::move(fileName)) { + diagnosticsClient_ = std::make_unique(); + unit_ = std::make_unique(diagnosticsClient_.get()); + + actions_.emplace_back([this]() { showSearchPaths(std::cerr); }); + actions_.emplace_back([this]() { preprocess(); }); + actions_.emplace_back([this]() { printPreprocessedText(); }); + actions_.emplace_back([this]() { dumpMacros(std::cout); }); + actions_.emplace_back([this]() { dumpTokens(std::cout); }); + actions_.emplace_back([this]() { unit_->preprocessor()->squeeze(); }); + actions_.emplace_back([this]() { parse(); }); + actions_.emplace_back([this]() { dumpSymbols(std::cout); }); + actions_.emplace_back([this]() { dumpAst(); }); + actions_.emplace_back([this]() { printAstIfNeeded(); }); + actions_.emplace_back([this]() { serializeAst(); }); + actions_.emplace_back([this]() { emitIR(); }); +} -using namespace cxx; +Frontend::~Frontend() {} -class CheckExpressionTypes final : private ASTVisitor { - public: - [[nodiscard]] auto operator()(TranslationUnit* unit) { - std::size_t missingTypes = 0; - std::swap(unit_, unit); - std::swap(missingTypes_, missingTypes); +auto Frontend::translationUnit() const -> TranslationUnit* { + return unit_.get(); +} - accept(unit_->ast()); +auto Frontend::toolchain() const -> Toolchain* { return toolchain_.get(); } - std::swap(unit_, unit); - std::swap(missingTypes_, missingTypes); +auto Frontend::fileName() const -> const std::string& { return fileName_; } - return missingTypes == 0; - } +void Frontend::addAction(std::function action) { + actions_.emplace_back(std::move(action)); +} - private: - using ASTVisitor::visit; +auto Frontend::operator()() -> bool { + prepare(); + preparePreprocessor(); - auto preVisit(AST* ast) -> bool override { - if (ast_cast(ast)) { - // skip template declarations, as they are not instantiated yet - return false; - } + for (const auto& action : actions_) { + if (shouldExit_) break; + action(); + } - if (auto expression = ast_cast(ast)) { - if (!expression->type) { - const auto loc = expression->firstSourceLocation(); + diagnosticsClient_->verifyExpectedDiagnostics(); - unit_->warning(loc, std::format("untyped expression of kind '{}'", - to_string(expression->kind()))); + return !diagnosticsClient_->hasErrors(); +} - ++missingTypes_; - return false; - } - } +void Frontend::withOutputStream( + const std::optional& extension, + const std::function& action) { + auto explicitOutput = cli.getSingle("-o"); - return true; // visit children + if (explicitOutput == "-" || (!explicitOutput.has_value() && + (!extension.has_value() || fileName_ == "-"))) { + action(std::cout); + return; } - private: - TranslationUnit* unit_ = nullptr; - std::size_t missingTypes_ = 0; -}; + auto inputFile = fs::path{fileName_}.filename(); + auto defaultOutputFile = inputFile.replace_extension(*extension); -auto readAll(const std::string& fileName, std::istream& in) - -> std::optional { - std::string code; - char buffer[4 * 1024]; - do { - in.read(buffer, sizeof(buffer)); - code.append(buffer, in.gcount()); - } while (in); - return code; -} + auto outputFile = cli.getSingle("-o").value_or(defaultOutputFile.string()); -auto readAll(const std::string& fileName) -> std::optional { - if (fileName == "-" || fileName.empty()) return readAll("", std::cin); - if (std::ifstream stream(fileName); stream) return readAll(fileName, stream); - return std::nullopt; + std::ofstream output(outputFile); + action(output); } -void dumpTokens(const CLI& cli, TranslationUnit& unit, std::ostream& output) { - auto lang = LanguageKind::kCXX; - - if (auto x = cli.getSingle("x")) { - if (x == "c") lang = LanguageKind::kC; - } else if (unit.fileName().ends_with(".c")) { - lang = LanguageKind::kC; +#ifdef CXX_WITH_MLIR +void Frontend::withRawOutputStream( + const std::optional& extension, + const std::function& action) { + auto explicitOutput = cli.getSingle("-o"); + + if (explicitOutput == "-" || (!explicitOutput.has_value() && + (!extension.has_value() || fileName_ == "-"))) { + action(llvm::outs()); + return; } - std::string flags; + auto inputFile = fs::path{fileName_}.filename(); + auto defaultOutputFile = inputFile.replace_extension(*extension); - for (SourceLocation loc(1);; loc = loc.next()) { - const auto& tk = unit.tokenAt(loc); + auto outputFile = cli.getSingle("-o").value_or(defaultOutputFile.string()); - flags.clear(); + std::error_code error_code; + llvm::raw_fd_ostream output(outputFile, error_code); + action(output); +} +#endif - if (tk.startOfLine()) { - flags += " [start-of-line]"; - } +void Frontend::printPreprocessedText() { + if (!cli.opt_E && !cli.opt_Eonly) { + return; + } - if (tk.leadingSpace()) { - flags += " [leading-space]"; - } + if (cli.opt_dM) { + // If we are only dumping macros, we don't need to output the preprocessed + // text. + return; + } - auto kind = tk.kind(); - if (kind == TokenKind::T_IDENTIFIER) { - kind = Lexer::classifyKeyword(tk.spell(), lang); - } + shouldExit_ = true; - output << std::format("{} '{}'{}", Token::name(kind), tk.spell(), flags); + if (cli.opt_Eonly) { + // If we are only preprocessing, we don't need to output the preprocessed + return; + } - auto pos = unit.tokenStartPosition(loc); + withOutputStream(std::nullopt, [&](std::ostream& out) { + unit_->preprocessor()->getPreprocessedText(unit_->tokens(), out); + }); +} - output << std::format(" at {}:{}:{}\n", pos.fileName, pos.line, pos.column); +void Frontend::preprocess() { + auto source = readAll(fileName_); - if (tk.is(TokenKind::T_EOF_SYMBOL)) break; + if (!source.has_value()) { + std::cerr << std::format("cxx: No such file or directory: '{}'\n", + fileName_); + shouldExit_ = true; + exitStatus_ = EXIT_FAILURE; + return; } + + unit_->setSource(std::move(*source), fileName_); } -auto runOnFile(const CLI& cli, const std::string& fileName) -> bool { - VerifyDiagnosticsClient diagnosticsClient; - TranslationUnit unit(&diagnosticsClient); +void Frontend::dumpMacros(std::ostream& out) { + if (!cli.opt_E && !cli.opt_dM) return; + + unit_->preprocessor()->printMacros(out); + + shouldExit_ = true; +} - auto preprocessor = unit.preprocessor(); +void Frontend::prepare() { + auto preprocessor = unit_->preprocessor(); const auto lang = cli.getSingle("-x"); - if (lang == "c" || (!lang.has_value() && fileName.ends_with(".c"))) { + if (lang == "c" || (!lang.has_value() && fileName_.ends_with(".c"))) { // set the language to C preprocessor->setLanguage(LanguageKind::kC); } - std::unique_ptr toolchain; - if (cli.opt_verify) { - diagnosticsClient.setVerify(true); - preprocessor->setCommentHandler(&diagnosticsClient); + diagnosticsClient_->setVerify(true); + preprocessor->setCommentHandler(diagnosticsClient_.get()); } auto toolchainId = cli.getSingle("-toolchain"); @@ -190,7 +219,7 @@ auto runOnFile(const CLI& cli, const std::string& fileName) -> bool { host = "x86_64"; #endif - toolchain = std::make_unique( + toolchain_ = std::make_unique( preprocessor, cli.getSingle("-arch").value_or(host)); } else if (toolchainId == "wasm32") { @@ -218,7 +247,7 @@ auto runOnFile(const CLI& cli, const std::string& fileName) -> bool { wasmToolchain->setSysroot(sysroot_dir.string()); } - toolchain = std::move(wasmToolchain); + toolchain_ = std::move(wasmToolchain); } else if (toolchainId == "linux") { // on linux we default to x86_64, unless the host is aarch64 std::string host = "x86_64"; @@ -227,7 +256,7 @@ auto runOnFile(const CLI& cli, const std::string& fileName) -> bool { host = "aarch64"; #endif - toolchain = std::make_unique( + toolchain_ = std::make_unique( preprocessor, cli.getSingle("-arch").value_or(host)); } else if (toolchainId == "windows") { // on linux we default to x86_64, unless the host is aarch64 @@ -252,30 +281,31 @@ auto runOnFile(const CLI& cli, const std::string& fileName) -> bool { windowsToolchain->setWinsdkversion(versions.back()); } - toolchain = std::move(windowsToolchain); + toolchain_ = std::move(windowsToolchain); } - if (toolchain) { - unit.control()->setMemoryLayout(toolchain->memoryLayout()); + unit_->control()->setMemoryLayout(toolchain_->memoryLayout()); +} - if (!cli.opt_nostdinc) toolchain->addSystemIncludePaths(); +void Frontend::preparePreprocessor() { + auto preprocessor = unit_->preprocessor(); - if (!cli.opt_nostdincpp) toolchain->addSystemCppIncludePaths(); + if (cli.opt_P) { + preprocessor->setOmitLineMarkers(true); + } - toolchain->addPredefinedMacros(); + if (!cli.opt_nostdinc) { + toolchain_->addSystemIncludePaths(); } - for (const auto& path : cli.get("-I")) { - preprocessor->addSystemIncludePath(path); + if (!cli.opt_nostdincpp) { + toolchain_->addSystemCppIncludePaths(); } - if (cli.opt_v) { - std::cerr << std::format("#include <...> search starts here:\n"); - const auto& paths = preprocessor->systemIncludePaths(); - for (auto it = rbegin(paths); it != rend(paths); ++it) { - std::cerr << std::format(" {}\n", *it); - } - std::cerr << std::format("End of search list.\n"); + toolchain_->addPredefinedMacros(); + + for (const auto& path : cli.get("-I")) { + preprocessor->addSystemIncludePath(path); } for (const auto& macro : cli.get("-D")) { @@ -292,20 +322,6 @@ auto runOnFile(const CLI& cli, const std::string& fileName) -> bool { preprocessor->undefMacro(macro); } - auto outputs = cli.get("-o"); - - auto outfile = !outputs.empty() && outputs.back() != "-" - ? std::optional{std::ofstream{outputs.back()}} - : std::nullopt; - - auto& output = outfile ? *outfile : std::cout; - - bool shouldExit = false; - - if (cli.opt_P) { - preprocessor->setOmitLineMarkers(true); - } - if (cli.opt_H && (cli.opt_E || cli.opt_Eonly)) { preprocessor->setOnWillIncludeHeader( [&](const std::string& header, int level) { @@ -313,126 +329,115 @@ auto runOnFile(const CLI& cli, const std::string& fileName) -> bool { std::cout << std::format("{} {}\n", fill, header); }); } +} - if (auto source = readAll(fileName)) { - if (cli.opt_E && !cli.opt_dM) { - std::vector tokens; - preprocessor->preprocess(std::move(*source), fileName, tokens); - preprocessor->getPreprocessedText(tokens, output); - shouldExit = true; - } else { - unit.setSource(std::move(*source), fileName); - if (cli.opt_dM) { - preprocessor->printMacros(output); - shouldExit = true; - } else if (cli.opt_dump_tokens) { - dumpTokens(cli, unit, output); - shouldExit = true; - } else if (cli.opt_Eonly) { - shouldExit = true; - } - } - } else { - std::cerr << std::format("cxx: No such file or directory: '{}'\n", - fileName); - return false; +void Frontend::parse() { + unit_->parse(ParserConfiguration{ + .checkTypes = cli.opt_fcheck || unit_->language() == LanguageKind::kC, + .fuzzyTemplateResolution = true, + .reflect = !cli.opt_fno_reflect, + }); + + if (cli.opt_freport_missing_types) { + (void)checkExpressionTypes(*unit_); } +} - if (!shouldExit) { - unit.parse(ParserConfiguration{ - .checkTypes = cli.opt_fcheck || unit.language() == LanguageKind::kC, - .fuzzyTemplateResolution = true, - .reflect = !cli.opt_fno_reflect, - }); +void Frontend::dumpTokens(std::ostream& out) { + if (!cli.opt_dump_tokens) return; - if (cli.opt_freport_missing_types) { - CheckExpressionTypes checkExpressionTypes; - const auto missingTypes = checkExpressionTypes(&unit); - } + auto dumpTokens = DumpTokens{cli}; + dumpTokens(*unit_, out); - if (cli.opt_dump_symbols && unit.globalScope()) { - dump(std::cout, unit.globalScope()->owner()); - } + shouldExit_ = true; +} - if (cli.opt_emit_ast) { - (void)unit.serialize(output); - } +void Frontend::dumpSymbols(std::ostream& out) { + if (!cli.opt_dump_symbols) return; + auto globalScope = unit_->globalScope(); + auto globalNamespace = globalScope->owner(); + cxx::dump(out, globalNamespace); +} - if (cli.opt_ast_dump) { - ASTPrinter printAST(&unit, std::cout); - printAST(unit.ast()); - } +void Frontend::dumpAst() { + if (!cli.opt_ast_dump) return; + auto printAST = ASTPrinter{unit_.get(), std::cout}; + printAST(unit_->ast()); +} - if (cli.opt_ast_print) { - ASTPrettyPrinter prettyPrinter(&unit, std::cout); - prettyPrinter(unit.ast()); - } +void Frontend::printAstIfNeeded() { + if (!cli.opt_ast_print) return; + auto prettyPrinter = ASTPrettyPrinter{unit_.get(), std::cout}; + prettyPrinter(unit_->ast()); +} -#ifdef CXX_WITH_MLIR - if (cli.opt_emit_ir) { - mlir::MLIRContext context; - context.loadDialect(); +void Frontend::serializeAst() { + if (!cli.opt_emit_ast) return; + auto outputFile = fs::path{fileName_}.filename().replace_extension(".ast"); + std::ofstream out(outputFile.string(), std::ios::binary); + (void)unit_->serialize(out); +} - cxx::Codegen codegen(context, &unit); +void Frontend::showSearchPaths(std::ostream& out) { + if (!cli.opt_v) return; - auto ir = codegen(unit.ast()); + out << std::format("#include <...> search starts here:\n"); - if (failed(lowerToMLIR(ir.module))) { - std::cerr << "cxx: failed to lower C++ AST to MLIR" << std::endl; - return false; - } + const auto& searchPaths = unit_->preprocessor()->systemIncludePaths(); - mlir::OpPrintingFlags flags; - if (cli.opt_g) { - flags.enableDebugInfo(true, false); - } - ir.module->print(llvm::outs(), flags); - } -#endif + for (const auto& path : searchPaths | std::views::reverse) { + out << std::format(" {}\n", path); } - diagnosticsClient.verifyExpectedDiagnostics(); - - return !diagnosticsClient.hasErrors(); + out << std::format("End of search list.\n"); } -} // namespace - -auto main(int argc, char* argv[]) -> int { - using namespace cxx; +void Frontend::emitIR() { + if (!cli.opt_emit_ir) return; - CLI cli; - cli.parse(argc, argv); +#ifdef CXX_WITH_MLIR + mlir::MLIRContext context; + context.loadDialect(); - if (cli.opt_help) { - cli.showHelp(); - exit(0); - } + auto codegen = cxx::Codegen{context, unit_.get()}; - const auto& inputFiles = cli.positionals(); + auto ir = codegen(unit_->ast()); - if (cli.opt_lsp_test) { - cli.opt_lsp = true; + if (failed(lowerToMLIR(ir.module))) { + std::cerr << "cxx: failed to lower C++ AST to MLIR" << std::endl; + shouldExit_ = true; + exitStatus_ = EXIT_FAILURE; + return; } - if (!cli.opt_lsp && inputFiles.empty()) { - std::cerr << "cxx: no input files" << std::endl - << "Usage: cxx [options] file..." << std::endl; - return EXIT_FAILURE; + mlir::OpPrintingFlags flags; + if (cli.opt_g) { + flags.enableDebugInfo(true, false); } - int existStatus = EXIT_SUCCESS; + withRawOutputStream(std::nullopt, [&](llvm::raw_ostream& out) { + ir.module->print(out, flags); + }); - if (cli.opt_lsp) { - lsp::Server server(cli); - existStatus = server.start(); - } else { - for (const auto& fileName : inputFiles) { - if (!runOnFile(cli, fileName)) { - existStatus = EXIT_FAILURE; - } - } - } +#endif +} + +auto Frontend::readAll(const std::string& fileName, std::istream& in) + -> std::optional { + std::string code; + char buffer[4 * 1024]; + do { + in.read(buffer, sizeof(buffer)); + code.append(buffer, in.gcount()); + } while (in); + return code; +} - return existStatus; +auto Frontend::readAll(const std::string& fileName) + -> std::optional { + if (fileName == "-" || fileName.empty()) return readAll("", std::cin); + if (std::ifstream stream(fileName); stream) return readAll(fileName, stream); + return std::nullopt; } + +} // namespace cxx diff --git a/src/frontend/cxx/frontend.h b/src/frontend/cxx/frontend.h new file mode 100644 index 00000000..55b3fb84 --- /dev/null +++ b/src/frontend/cxx/frontend.h @@ -0,0 +1,93 @@ +// Copyright (c) 2025 Roberto Raggi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +#include +#include +#include + +#ifdef CXX_WITH_MLIR +#include +#endif + +namespace cxx { + +class VerifyDiagnosticsClient; +class Toolchain; + +class Frontend { + public: + Frontend(const CLI& cli, std::string fileName); + ~Frontend(); + + [[nodiscard]] auto operator()() -> bool; + + [[nodiscard]] auto translationUnit() const -> TranslationUnit*; + [[nodiscard]] auto toolchain() const -> Toolchain*; + [[nodiscard]] auto fileName() const -> const std::string&; + + void addAction(std::function action); + + private: + void prepare(); + void preparePreprocessor(); + void preprocess(); + void parse(); + void showSearchPaths(std::ostream& out); + void dumpTokens(std::ostream& out); + void dumpSymbols(std::ostream& out); + void serializeAst(); + void dumpAst(); + void printAstIfNeeded(); + void emitIR(); + void printPreprocessedText(); + void dumpMacros(std::ostream& out); + + void withOutputStream(const std::optional& extension, + const std::function& action); + +#ifdef CXX_WITH_MLIR + void withRawOutputStream( + const std::optional& extension, + const std::function& action); +#endif + + [[nodiscard]] auto readAll(const std::string& fileName, std::istream& in) + -> std::optional; + + [[nodiscard]] auto readAll(const std::string& fileName) + -> std::optional; + + private: + const CLI& cli; + std::string fileName_; + std::unique_ptr unit_; + std::unique_ptr diagnosticsClient_; + std::unique_ptr toolchain_; + std::vector> actions_; + bool shouldExit_ = false; + int exitStatus_ = 0; +}; + +} // namespace cxx \ No newline at end of file diff --git a/src/parser/cxx/translation_unit.cc b/src/parser/cxx/translation_unit.cc index 9103ac98..d70c1034 100644 --- a/src/parser/cxx/translation_unit.cc +++ b/src/parser/cxx/translation_unit.cc @@ -170,8 +170,7 @@ void TranslationUnit::parse(ParserConfiguration config) { config_ = std::move(config); - preprocessor_->squeeze(); - Parser parse(this); + auto parse = Parser(this); parse(ast_); } diff --git a/src/parser/cxx/translation_unit.h b/src/parser/cxx/translation_unit.h index 5759ded2..9165d404 100644 --- a/src/parser/cxx/translation_unit.h +++ b/src/parser/cxx/translation_unit.h @@ -94,6 +94,10 @@ class TranslationUnit { return static_cast(tokens_.size()); } + [[nodiscard]] inline auto tokens() const -> const std::vector& { + return tokens_; + } + [[nodiscard]] inline auto tokenAt(SourceLocation loc) const -> const Token& { return tokens_[loc.index()]; }