Skip to content

Commit 03d1917

Browse files
committed
javadoc visitor uses ref parser
#improvement
1 parent fcffff5 commit 03d1917

File tree

1 file changed

+113
-219
lines changed

1 file changed

+113
-219
lines changed

src/lib/AST/ParseJavadoc.cpp

Lines changed: 113 additions & 219 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <clang/AST/RawCommentList.h>
2424
#include <clang/Lex/Lexer.h>
2525
#include <clang/Basic/SourceLocation.h>
26+
#include "lib/AST/ParseRef.hpp"
27+
2628
#ifdef _MSC_VER
2729
#pragma warning(push)
2830
#pragma warning(disable: 5054) // C5054: operator '+': deprecated between enumerations of different types
@@ -1072,6 +1074,21 @@ std::string
10721074
JavadocVisitor::
10731075
fixReference(std::string& ref)
10741076
{
1077+
auto peekNextIt = [&]() -> std::optional<std::string_view>
1078+
{
1079+
++it_;
1080+
if (it_ == end_ ||
1081+
(*it_)->getCommentKind() != CommentKind::TextComment)
1082+
{
1083+
--it_;
1084+
return std::nullopt;
1085+
}
1086+
Comment const* c = *it_;
1087+
std::string_view text = static_cast<TextComment const*>(c)->getText();
1088+
--it_;
1089+
return text;
1090+
};
1091+
10751092
// If the ref is only "operator", the next text comment
10761093
// might contain a simple operator name/type, or a
10771094
// full operator overload.
@@ -1080,249 +1097,126 @@ fixReference(std::string& ref)
10801097
// we find an unbalanced '('.
10811098
// Simply including the next text comment is enough
10821099
// for the next step.
1083-
std::string_view trimmed = trim(ref);
1084-
bool const isNoSuffixOperator =
1085-
trimmed == "operator" ||
1086-
trimmed.ends_with("::operator");
1087-
if (isNoSuffixOperator)
1088-
{
1089-
++it_;
1090-
if (it_ == end_)
1091-
{
1092-
return ref;
1093-
}
1094-
Comment const* c = *it_;
1095-
if (c->getCommentKind() == CommentKind::TextComment)
1096-
{
1097-
ref += static_cast<TextComment const*>(c)->getText();
1098-
}
1099-
else
1100+
ParsedRef v;
1101+
while (true)
1102+
{
1103+
// Attempt to parse ref
1104+
char const* first = ref.data();
1105+
char const* last = first + ref.size();
1106+
auto const pres = parse(first, last, v);
1107+
if (!pres)
11001108
{
1101-
return ref;
1102-
}
1103-
}
1104-
static constexpr std::string_view idChars =
1105-
"abcdefghijklmnopqrstuvwxyz"
1106-
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1107-
"0123456789"
1108-
"_:";
1109-
bool const isNoFunctionOperator =
1110-
isNoSuffixOperator ||
1111-
[trimmed]{
1112-
if (contains_n(trimmed, '(', 1))
1113-
{
1114-
return false;
1115-
}
1116-
std::size_t pos = trimmed.rfind("::");
1117-
std::string_view last = trimmed;
1118-
if (pos != std::string::npos) {
1119-
last = trimmed.substr(pos + 2);
1120-
}
1121-
if (!last.starts_with("operator"))
1122-
{
1123-
return false;
1124-
}
1125-
last.remove_prefix(8);
1126-
if (last.empty())
1109+
// The ref could not be parsed, add content from next
1110+
// text comment to the ref
1111+
auto const nextTextOpt = peekNextIt();
1112+
if (!nextTextOpt)
11271113
{
1128-
return true;
1114+
return {};
11291115
}
1130-
return !contains(idChars, last.front());
1131-
}();
1132-
1133-
// Clang parses the copydoc command breaking
1134-
// before the complete overload information. For instance,
1135-
// `@copydoc operator()(unsigned char) const` will create
1136-
// a node with the text `operator()(unsigned` and another
1137-
// with `char) const`. We need to merge these nodes.
1138-
// If the ref contains an unbalanced '(', then it's
1139-
// a function, and we need to merge the next text comments
1140-
// until we find a balanced ')'.
1141-
bool const isFunction = contains(ref, '(');
1142-
if (isFunction)
1143-
{
1144-
while (std::ranges::count(ref, '(') != std::ranges::count(ref, ')'))
1145-
{
1116+
ref += *nextTextOpt;
11461117
++it_;
1147-
if (it_ == end_)
1148-
{
1149-
break;
1150-
}
1151-
Comment const* c = *it_;
1152-
if (c->getCommentKind() == CommentKind::TextComment)
1153-
{
1154-
ref += static_cast<TextComment const*>(c)->getText();
1155-
}
1156-
else
1118+
continue;
1119+
}
1120+
1121+
// The ref is fully parsed
1122+
if (pres.ptr != last)
1123+
{
1124+
// The ref didn't consume all the text, so we need to
1125+
// remove the leftover text from the ref and return it
1126+
auto leftover = std::string(pres.ptr, last - pres.ptr);
1127+
// If leftover is only whitespace, the ref might need
1128+
// the next text comment to complete it.
1129+
if (!isWhitespace(leftover))
11571130
{
1158-
break;
1131+
ref.erase(pres.ptr - first);
1132+
return leftover;
11591133
}
11601134
}
1161-
if (rtrim(ref).ends_with(')'))
1135+
1136+
// The ref is fully parsed, but we might want to
1137+
// include the next text comment if it contains
1138+
// a valid continuation to the ref.
1139+
bool const mightHaveMoreQualifiers =
1140+
v.HasFunctionParameters &&
1141+
v.ExceptionSpec.Implicit &&
1142+
v.ExceptionSpec.Operand.empty();
1143+
if (mightHaveMoreQualifiers)
11621144
{
1163-
static constexpr std::array<std::string_view, 5> qualifiers = {
1164-
"const",
1165-
"volatile",
1166-
"noexcept",
1167-
"&&",
1168-
"&",
1169-
};
1170-
auto isQualifiersOnly = [](std::string_view str)
1145+
llvm::SmallVector<std::string_view, 4> potentialQualifiers;
1146+
if (v.Kind == ReferenceKind::None)
11711147
{
1172-
// Iterate all words between spaces and check if
1173-
// they are qualifiers
1174-
std::size_t pos = 0;
1175-
while (pos < str.size())
1148+
// "&&" or "&" not defined yet
1149+
if (!v.IsConst)
11761150
{
1177-
std::size_t const start = str.find_first_not_of(' ', pos);
1178-
if (start == std::string::npos)
1179-
{
1180-
break;
1181-
}
1182-
std::size_t const end = str.find_first_of(' ', start);
1183-
std::string_view word = str.substr(start, end - start);
1184-
if (std::ranges::find(qualifiers, word) == qualifiers.end())
1185-
{
1186-
return false;
1187-
}
1188-
pos = end;
1151+
potentialQualifiers.push_back("const");
11891152
}
1190-
return true;
1191-
};
1192-
auto isWhitespaceOnly = [](std::string_view str)
1193-
{
1194-
return str.empty() || str.find_first_not_of(' ') == std::string::npos;
1195-
};
1196-
1197-
// peek next comment
1198-
std::string functionContinuation;
1199-
auto originalIt = it_;
1200-
++it_;
1201-
while (
1202-
it_ != end_ &&
1203-
(isWhitespaceOnly(functionContinuation) ||
1204-
isQualifiersOnly(functionContinuation)))
1205-
{
1206-
Comment const* c = *it_;
1207-
if (c->getCommentKind() != CommentKind::TextComment)
1153+
if (!v.IsVolatile)
12081154
{
1209-
break;
1155+
potentialQualifiers.push_back("volatile");
12101156
}
1211-
functionContinuation += static_cast<TextComment const*>(c)->getText();
1212-
++it_;
1157+
potentialQualifiers.push_back("&");
12131158
}
1214-
if (isWhitespaceOnly(functionContinuation))
1159+
else if (
1160+
v.Kind == ReferenceKind::LValue &&
1161+
ref.ends_with('&'))
12151162
{
1216-
it_ = originalIt;
1163+
// The second "&" might be in the next Text block
1164+
potentialQualifiers.push_back("&");
12171165
}
1218-
else /* if (!functionContinuation.empty()) */
1166+
potentialQualifiers.push_back("noexcept");
1167+
auto const nextTextOpt = peekNextIt();
1168+
if (!nextTextOpt)
12191169
{
1220-
--it_;
1221-
std::string_view suffix = functionContinuation;
1222-
std::string_view leftover = functionContinuation;
1223-
bool foundAny = false;
1224-
std::size_t totalRemoved = 0;
1225-
while (!suffix.empty())
1226-
{
1227-
bool found = false;
1228-
std::size_t const initialWhitespace = std::min(
1229-
suffix.find_first_not_of(" "), suffix.size());
1230-
for (auto const& q : qualifiers)
1231-
{
1232-
if (suffix.substr(initialWhitespace).starts_with(q))
1233-
{
1234-
std::size_t const toRemove = initialWhitespace + q.size();
1235-
if (
1236-
contains(idChars, q.back()) &&
1237-
suffix.size() > toRemove &&
1238-
contains(idChars, suffix[toRemove]))
1239-
{
1240-
// This is not a qualifier, but part of
1241-
// an identifier
1242-
continue;
1243-
}
1244-
suffix.remove_prefix(toRemove);
1245-
totalRemoved += toRemove;
1246-
found = true;
1247-
foundAny = true;
1248-
break;
1249-
}
1250-
}
1251-
if (!found)
1170+
auto leftover = std::string(pres.ptr, last - pres.ptr);
1171+
ref.erase(pres.ptr - first);
1172+
return leftover;
1173+
}
1174+
std::string_view const nextText = *nextTextOpt;
1175+
std::string_view const trimmed = ltrim(nextText);
1176+
if (trimmed.empty() ||
1177+
std::ranges::any_of(
1178+
potentialQualifiers,
1179+
[&](std::string_view s)
12521180
{
1253-
break;
1254-
}
1255-
}
1256-
if (foundAny)
1257-
{
1258-
leftover = leftover.substr(0, totalRemoved);
1259-
ref += leftover;
1260-
return std::string(suffix);
1261-
}
1181+
return trimmed.starts_with(s);
1182+
}))
1183+
{
1184+
ref += nextText;
1185+
++it_;
1186+
continue;
12621187
}
12631188
}
1264-
}
12651189

1266-
1267-
// Clang refs can also contain invalid characters
1268-
// at the end, especially punctuation. We need to
1269-
// truncate the ref at the last valid identifier
1270-
// character.
1271-
// The last identifier character depends on the type
1272-
// of ref.
1273-
// - If it's an operator but not a function, then
1274-
// we also consider operator chars as valid.
1275-
// - If it's a function, then we also consider ')'
1276-
// as valid.
1277-
// - In all cases, we consider the identifier chars
1278-
// as valid.
1279-
static constexpr std::string_view operatorChars =
1280-
"~!%^&*()-+=|[]{};:,.<>?/";
1281-
static constexpr std::string_view parenChars =
1282-
"()";
1283-
std::string leftover;
1284-
bool const isRegularIdentifier = !isFunction && !isNoFunctionOperator;
1285-
if (isRegularIdentifier)
1286-
{
1287-
auto const lastIdChar = ref.find_last_of(idChars);
1288-
auto const firstLeftoverChar = lastIdChar + 1;
1289-
if (firstLeftoverChar < ref.size())
1290-
{
1291-
leftover = std::string_view(ref).substr(lastIdChar + 1);
1292-
ref = ref.substr(0, lastIdChar + 1);
1293-
}
1294-
}
1295-
else if (isFunction)
1296-
{
1297-
auto reservedCharsets = {idChars, parenChars};
1298-
auto reservedChars = std::views::join(reservedCharsets);
1299-
auto const lastIdOrParen = find_last_of(ref, reservedChars);
1300-
auto const firstLeftoverChar =
1301-
lastIdOrParen == ref.end() ?
1302-
ref.end() :
1303-
std::next(lastIdOrParen);
1304-
if (firstLeftoverChar != ref.end())
1190+
// The ref might have more components
1191+
bool const mightHaveMoreComponents =
1192+
!v.HasFunctionParameters;
1193+
if (mightHaveMoreComponents)
13051194
{
1306-
leftover = std::string_view(firstLeftoverChar, ref.end());
1307-
ref = ref.substr(0, std::distance(ref.begin(), firstLeftoverChar));
1308-
}
1309-
}
1310-
else /* if (isNoFunctionOperator) */
1311-
{
1312-
auto reservedCharsets = {idChars, operatorChars};
1313-
auto reservedChars = std::views::join(reservedCharsets);
1314-
auto const lastIdOrOperator = find_last_of(ref, reservedChars);
1315-
auto const firstLeftoverChar =
1316-
lastIdOrOperator == ref.end() ?
1317-
ref.end() :
1318-
std::next(lastIdOrOperator);
1319-
if (firstLeftoverChar != ref.end())
1320-
{
1321-
leftover = std::string_view(firstLeftoverChar, ref.end());
1322-
ref = ref.substr(0, std::distance(ref.begin(), firstLeftoverChar));
1195+
auto const nextTextOpt = peekNextIt();
1196+
if (!nextTextOpt)
1197+
{
1198+
auto leftover = std::string(pres.ptr, last - pres.ptr);
1199+
ref.erase(pres.ptr - first);
1200+
return leftover;
1201+
}
1202+
std::string_view const nextText = *nextTextOpt;
1203+
std::string_view const trimmed = ltrim(nextText);
1204+
static constexpr std::string_view idChars
1205+
= "abcdefghijklmnopqrstuvwxyz"
1206+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1207+
"0123456789"
1208+
"_:";
1209+
if (trimmed.empty() ||
1210+
contains(idChars, trimmed.front()))
1211+
{
1212+
ref += nextText;
1213+
++it_;
1214+
continue;
1215+
}
13231216
}
1217+
1218+
return {};
13241219
}
1325-
return leftover;
13261220
}
13271221

13281222
//------------------------------------------------

0 commit comments

Comments
 (0)