Skip to content

[clang-format] Add functionality of getting info about numeric literals #152878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 11, 2025

Conversation

owenca
Copy link
Contributor

@owenca owenca commented Aug 9, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Aug 9, 2025

@llvm/pr-subscribers-clang-format

Author: owenca (owenca)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/152878.diff

5 Files Affected:

  • (modified) clang/lib/Format/CMakeLists.txt (+1)
  • (added) clang/lib/Format/NumericLiteralInfo.cpp (+67)
  • (added) clang/lib/Format/NumericLiteralInfo.h (+41)
  • (modified) clang/unittests/Format/CMakeLists.txt (+1)
  • (added) clang/unittests/Format/NumericLiteralInfoTest.cpp (+64)
diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 9f4939824fdb8..24f435d2caee1 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -13,6 +13,7 @@ add_clang_library(clangFormat
   MacroExpander.cpp
   MatchFilePath.cpp
   NamespaceEndCommentsFixer.cpp
+  NumericLiteralInfo.cpp
   ObjCPropertyAttributeOrderFixer.cpp
   QualifierAlignmentFixer.cpp
   SortJavaScriptImports.cpp
diff --git a/clang/lib/Format/NumericLiteralInfo.cpp b/clang/lib/Format/NumericLiteralInfo.cpp
new file mode 100644
index 0000000000000..2c7a7601ed633
--- /dev/null
+++ b/clang/lib/Format/NumericLiteralInfo.cpp
@@ -0,0 +1,67 @@
+//===--- NumericLiteralInfo.cpp ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the functionality of getting information about a
+/// numeric literal string, including 0-based positions of the base letter, the
+/// decimal/hexadecimal point, the exponent letter, and the suffix, or npos if
+/// absent.
+///
+//===----------------------------------------------------------------------===//
+
+#include "NumericLiteralInfo.h"
+#include "llvm/ADT/StringExtras.h"
+
+namespace clang {
+namespace format {
+
+using namespace llvm;
+
+constexpr auto npos = StringRef::npos;
+
+NumericLiteralInfo::NumericLiteralInfo(StringRef Text, char Separator) {
+  assert(Text.size() > 1);
+
+  bool IsHex = false;
+  BaseLetterPos = npos;
+  if (Text[0] == '0') {
+    switch (Text[1]) {
+    case 'x':
+    case 'X':
+      IsHex = true;
+      [[fallthrough]];
+    case 'b':
+    case 'B':
+    case 'o':
+    case 'O':
+      BaseLetterPos = 1; // e.g. 0xF
+      break;
+    }
+  }
+
+  DotPos = Text.find('.', BaseLetterPos == 1 ? 2 : 0); // e.g. 0x.1 or .1
+
+  // e.g. 1.e2 or 0xFp2
+  const auto Pos = DotPos != npos ? DotPos + 1 : BaseLetterPos + 2;
+
+  ExponentLetterPos =
+      // Trim C++ user-defined suffix as in `1_Pa`.
+      (Separator == '\'' ? Text.substr(0, Text.find('_')) : Text)
+          .find_insensitive(IsHex ? 'p' : 'e', Pos);
+
+  const bool HasExponent = ExponentLetterPos != npos;
+  SuffixPos = Text.find_if_not(
+      [&](char C) {
+        return (HasExponent || !IsHex ? isDigit : isHexDigit)(C) ||
+               C == Separator;
+      },
+      HasExponent ? ExponentLetterPos + 2 : Pos); // e.g. 1e-2f
+}
+
+} // namespace format
+} // namespace clang
diff --git a/clang/lib/Format/NumericLiteralInfo.h b/clang/lib/Format/NumericLiteralInfo.h
new file mode 100644
index 0000000000000..8ed5e87e2f410
--- /dev/null
+++ b/clang/lib/Format/NumericLiteralInfo.h
@@ -0,0 +1,41 @@
+//===--- NumericLiteralInfo.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_NUMERICLITERALINFO_H
+#define LLVM_CLANG_LIB_FORMAT_NUMERICLITERALINFO_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace format {
+
+extern const size_t npos;
+
+struct NumericLiteralInfo {
+  size_t BaseLetterPos;     // Position of the base letter.
+  size_t DotPos;            // Position of the decimal/hexadecimal point.
+  size_t ExponentLetterPos; // Position of the exponent letter.
+  size_t SuffixPos;         // Starting position of the suffix.
+
+  NumericLiteralInfo(size_t BaseLetterPos = npos, size_t DotPos = npos,
+                     size_t ExponentLetterPos = npos, size_t SuffixPos = npos)
+      : BaseLetterPos(BaseLetterPos), DotPos(DotPos),
+        ExponentLetterPos(ExponentLetterPos), SuffixPos(SuffixPos) {}
+
+  NumericLiteralInfo(llvm::StringRef Text, char Separator);
+
+  bool operator==(const NumericLiteralInfo &R) const {
+    return BaseLetterPos == R.BaseLetterPos && DotPos == R.DotPos &&
+           ExponentLetterPos == R.ExponentLetterPos && SuffixPos == R.SuffixPos;
+  }
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index edfc8d7a5beaa..c4c7b483ba68e 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -27,6 +27,7 @@ add_distinct_clang_unittest(FormatTests
   MacroExpanderTest.cpp
   MatchFilePathTest.cpp
   NamespaceEndCommentsFixerTest.cpp
+  NumericLiteralInfoTest.cpp
   ObjCPropertyAttributeOrderFixerTest.cpp
   QualifierFixerTest.cpp
   SortImportsTestJS.cpp
diff --git a/clang/unittests/Format/NumericLiteralInfoTest.cpp b/clang/unittests/Format/NumericLiteralInfoTest.cpp
new file mode 100644
index 0000000000000..6597cf3772de5
--- /dev/null
+++ b/clang/unittests/Format/NumericLiteralInfoTest.cpp
@@ -0,0 +1,64 @@
+//===- unittest/Format/NumericLiteralInfoTest.cpp -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../lib/Format/NumericLiteralInfo.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+namespace {
+
+class NumericLiteralInfoTest : public testing::Test {
+protected:
+  NumericLiteralInfo getInfo(llvm::StringRef Text, char Separator = '\'') {
+    return NumericLiteralInfo(Text, Separator);
+  }
+};
+
+TEST_F(NumericLiteralInfoTest, IntegerLiteral) {
+  // Decimal.
+  EXPECT_EQ(getInfo("90"), NumericLiteralInfo());
+  EXPECT_EQ(getInfo("9L"), NumericLiteralInfo(npos, npos, npos, 1));
+  EXPECT_EQ(getInfo("9'0U"), NumericLiteralInfo(npos, npos, npos, 3));
+
+  // Octal.
+  EXPECT_EQ(getInfo("07"), NumericLiteralInfo());
+  EXPECT_EQ(getInfo("0z"), NumericLiteralInfo(npos, npos, npos, 1));
+  // JavaScript.
+  EXPECT_EQ(getInfo("0o7"), NumericLiteralInfo(1));
+  EXPECT_EQ(getInfo("0O7_0", '_'), NumericLiteralInfo(1));
+
+  // Binary.
+  EXPECT_EQ(getInfo("0b1"), NumericLiteralInfo(1));
+  EXPECT_EQ(getInfo("0B1ul"), NumericLiteralInfo(1, npos, npos, 3));
+
+  // Hexadecimal.
+  EXPECT_EQ(getInfo("0xF"), NumericLiteralInfo(1));
+  EXPECT_EQ(getInfo("0XfZ"), NumericLiteralInfo(1, npos, npos, 3));
+}
+
+TEST_F(NumericLiteralInfoTest, FloatingPointLiteral) {
+  // Decimal.
+  EXPECT_EQ(getInfo(".9"), NumericLiteralInfo(npos, 0));
+  EXPECT_EQ(getInfo("9."), NumericLiteralInfo(npos, 1));
+  EXPECT_EQ(getInfo("9.F"), NumericLiteralInfo(npos, 1, npos, 2));
+  EXPECT_EQ(getInfo("9e9"), NumericLiteralInfo(npos, npos, 1));
+  EXPECT_EQ(getInfo("9E-9f"), NumericLiteralInfo(npos, npos, 1, 4));
+  EXPECT_EQ(getInfo("9.9e+9bf16"), NumericLiteralInfo(npos, 1, 3, 6));
+
+  // Hexadecimal.
+  EXPECT_EQ(getInfo("0X.Fp9"), NumericLiteralInfo(1, 2, 4));
+  EXPECT_EQ(getInfo("0xF.P9"), NumericLiteralInfo(1, 3, 4));
+  EXPECT_EQ(getInfo("0xFp9"), NumericLiteralInfo(1, npos, 3));
+  EXPECT_EQ(getInfo("0xFp+9F128"), NumericLiteralInfo(1, npos, 3, 6));
+  EXPECT_EQ(getInfo("0xF.Fp-9_Pa"), NumericLiteralInfo(1, 3, 5, 8));
+}
+
+} // namespace
+} // namespace format
+} // namespace clang

@owenca
Copy link
Contributor Author

owenca commented Aug 9, 2025

This is useful for #131510 and #151590.

@owenca owenca merged commit 3d38a92 into llvm:main Aug 11, 2025
9 checks passed
@owenca owenca deleted the numeric-literal branch August 11, 2025 16:24
@llvm llvm deleted a comment from llvm-ci Aug 12, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants