Skip to content

Commit 2e61695

Browse files
committed
javadoc parser extracts ref qualifiers
#feat
1 parent b76c8ba commit 2e61695

File tree

5 files changed

+1273
-12
lines changed

5 files changed

+1273
-12
lines changed

src/lib/AST/ParseJavadoc.cpp

Lines changed: 118 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,6 @@ visitInlineCommandComment(
988988
{
989989
MRDOCS_CHECK_OR(goodArgCount(1, *C));
990990
std::string ref = C->getArgText(0).str();
991-
std::string originalRef = ref;
992991
std::string leftOver = fixReference(ref);
993992
bool const hasExtra = !leftOver.empty();
994993
emplaceText<doc::Copied>(
@@ -1140,24 +1139,131 @@ fixReference(std::string& ref)
11401139
// a function, and we need to merge the next text comments
11411140
// until we find a balanced ')'.
11421141
bool const isFunction = contains(ref, '(');
1143-
while (std::ranges::count(ref, '(') != std::ranges::count(ref, ')'))
1142+
if (isFunction)
11441143
{
1145-
++it_;
1146-
if (it_ == end_)
1147-
{
1148-
break;
1149-
}
1150-
Comment const* c = *it_;
1151-
if (c->getCommentKind() == CommentKind::TextComment)
1144+
while (std::ranges::count(ref, '(') != std::ranges::count(ref, ')'))
11521145
{
1153-
ref += static_cast<TextComment const*>(c)->getText();
1146+
++it_;
1147+
if (it_ == end_)
1148+
{
1149+
break;
1150+
}
1151+
Comment const* c = *it_;
1152+
if (c->getCommentKind() == CommentKind::TextComment)
1153+
{
1154+
ref += static_cast<TextComment const*>(c)->getText();
1155+
}
1156+
else
1157+
{
1158+
break;
1159+
}
11541160
}
1155-
else
1161+
if (rtrim(ref).ends_with(')'))
11561162
{
1157-
break;
1163+
static constexpr std::array<std::string_view, 5> qualifiers = {
1164+
"const",
1165+
"volatile",
1166+
"noexcept",
1167+
"&&",
1168+
"&",
1169+
};
1170+
auto isQualifiersOnly = [](std::string_view str)
1171+
{
1172+
// Iterate all words between spaces and check if
1173+
// they are qualifiers
1174+
std::size_t pos = 0;
1175+
while (pos < str.size())
1176+
{
1177+
std::size_t const start = str.find_first_not_of(' ', pos);
1178+
if (start == std::string::npos)
1179+
{
1180+
break;
1181+
}
1182+
std::size_t const end = str.find_first_of(' ', start);
1183+
std::string_view word = str.substr(start, end - start);
1184+
if (std::ranges::find(qualifiers, word) == qualifiers.end())
1185+
{
1186+
return false;
1187+
}
1188+
pos = end;
1189+
}
1190+
return true;
1191+
};
1192+
auto isWhitespaceOnly = [](std::string_view str)
1193+
{
1194+
return str.empty() || str.find_first_not_of(' ') == std::string::npos;
1195+
};
1196+
1197+
// peek next comment
1198+
std::string functionContinuation;
1199+
auto originalIt = it_;
1200+
++it_;
1201+
while (
1202+
it_ != end_ &&
1203+
(isWhitespaceOnly(functionContinuation) ||
1204+
isQualifiersOnly(functionContinuation)))
1205+
{
1206+
Comment const* c = *it_;
1207+
if (c->getCommentKind() != CommentKind::TextComment)
1208+
{
1209+
break;
1210+
}
1211+
functionContinuation += static_cast<TextComment const*>(c)->getText();
1212+
++it_;
1213+
}
1214+
if (isWhitespaceOnly(functionContinuation))
1215+
{
1216+
it_ = originalIt;
1217+
}
1218+
else /* if (!functionContinuation.empty()) */
1219+
{
1220+
--it_;
1221+
std::string_view suffix = functionContinuation;
1222+
std::string_view leftover = functionContinuation;
1223+
bool foundAny = false;
1224+
std::size_t totalRemoved = 0;
1225+
while (!suffix.empty())
1226+
{
1227+
bool found = false;
1228+
std::size_t const initialWhitespace = std::min(
1229+
suffix.find_first_not_of(" "), suffix.size());
1230+
for (auto const& q : qualifiers)
1231+
{
1232+
if (suffix.substr(initialWhitespace).starts_with(q))
1233+
{
1234+
std::size_t const toRemove = initialWhitespace + q.size();
1235+
if (
1236+
contains(idChars, q.back()) &&
1237+
suffix.size() > toRemove &&
1238+
contains(idChars, suffix[toRemove]))
1239+
{
1240+
// This is not a qualifier, but part of
1241+
// an identifier
1242+
continue;
1243+
}
1244+
suffix.remove_prefix(toRemove);
1245+
totalRemoved += toRemove;
1246+
found = true;
1247+
foundAny = true;
1248+
break;
1249+
}
1250+
}
1251+
if (!found)
1252+
{
1253+
break;
1254+
}
1255+
}
1256+
if (foundAny)
1257+
{
1258+
leftover = leftover.substr(0, totalRemoved);
1259+
ref += leftover;
1260+
return std::string(suffix);
1261+
}
1262+
}
11581263
}
11591264
}
11601265

1266+
11611267
// Clang refs can also contain invalid characters
11621268
// at the end, especially punctuation. We need to
11631269
// truncate the ref at the last valid identifier

0 commit comments

Comments
 (0)