23
23
#include < clang/AST/RawCommentList.h>
24
24
#include < clang/Lex/Lexer.h>
25
25
#include < clang/Basic/SourceLocation.h>
26
+ #include " lib/AST/ParseRef.hpp"
27
+
26
28
#ifdef _MSC_VER
27
29
#pragma warning(push)
28
30
#pragma warning(disable: 5054) // C5054: operator '+': deprecated between enumerations of different types
@@ -1072,6 +1074,21 @@ std::string
1072
1074
JavadocVisitor::
1073
1075
fixReference (std::string& ref)
1074
1076
{
1077
+ auto peekNextIt = [&]() -> std::optional<std::string_view>
1078
+ {
1079
+ ++it_;
1080
+ if (it_ == end_ ||
1081
+ (*it_)->getCommentKind () != CommentKind::TextComment)
1082
+ {
1083
+ --it_;
1084
+ return std::nullopt;
1085
+ }
1086
+ Comment const * c = *it_;
1087
+ std::string_view text = static_cast <TextComment const *>(c)->getText ();
1088
+ --it_;
1089
+ return text;
1090
+ };
1091
+
1075
1092
// If the ref is only "operator", the next text comment
1076
1093
// might contain a simple operator name/type, or a
1077
1094
// full operator overload.
@@ -1080,249 +1097,126 @@ fixReference(std::string& ref)
1080
1097
// we find an unbalanced '('.
1081
1098
// Simply including the next text comment is enough
1082
1099
// for the next step.
1083
- std::string_view trimmed = trim (ref);
1084
- bool const isNoSuffixOperator =
1085
- trimmed == " operator" ||
1086
- trimmed.ends_with (" ::operator" );
1087
- if (isNoSuffixOperator)
1088
- {
1089
- ++it_;
1090
- if (it_ == end_)
1091
- {
1092
- return ref;
1093
- }
1094
- Comment const * c = *it_;
1095
- if (c->getCommentKind () == CommentKind::TextComment)
1096
- {
1097
- ref += static_cast <TextComment const *>(c)->getText ();
1098
- }
1099
- else
1100
+ ParsedRef v;
1101
+ while (true )
1102
+ {
1103
+ // Attempt to parse ref
1104
+ char const * first = ref.data ();
1105
+ char const * last = first + ref.size ();
1106
+ auto const pres = parse (first, last, v);
1107
+ if (!pres)
1100
1108
{
1101
- return ref;
1102
- }
1103
- }
1104
- static constexpr std::string_view idChars =
1105
- " abcdefghijklmnopqrstuvwxyz"
1106
- " ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1107
- " 0123456789"
1108
- " _:" ;
1109
- bool const isNoFunctionOperator =
1110
- isNoSuffixOperator ||
1111
- [trimmed]{
1112
- if (contains_n (trimmed, ' (' , 1 ))
1113
- {
1114
- return false ;
1115
- }
1116
- std::size_t pos = trimmed.rfind (" ::" );
1117
- std::string_view last = trimmed;
1118
- if (pos != std::string::npos) {
1119
- last = trimmed.substr (pos + 2 );
1120
- }
1121
- if (!last.starts_with (" operator" ))
1122
- {
1123
- return false ;
1124
- }
1125
- last.remove_prefix (8 );
1126
- if (last.empty ())
1109
+ // The ref could not be parsed, add content from next
1110
+ // text comment to the ref
1111
+ auto const nextTextOpt = peekNextIt ();
1112
+ if (!nextTextOpt)
1127
1113
{
1128
- return true ;
1114
+ return {} ;
1129
1115
}
1130
- return !contains (idChars, last.front ());
1131
- }();
1132
-
1133
- // Clang parses the copydoc command breaking
1134
- // before the complete overload information. For instance,
1135
- // `@copydoc operator()(unsigned char) const` will create
1136
- // a node with the text `operator()(unsigned` and another
1137
- // with `char) const`. We need to merge these nodes.
1138
- // If the ref contains an unbalanced '(', then it's
1139
- // a function, and we need to merge the next text comments
1140
- // until we find a balanced ')'.
1141
- bool const isFunction = contains (ref, ' (' );
1142
- if (isFunction)
1143
- {
1144
- while (std::ranges::count (ref, ' (' ) != std::ranges::count (ref, ' )' ))
1145
- {
1116
+ ref += *nextTextOpt;
1146
1117
++it_;
1147
- if (it_ == end_)
1148
- {
1149
- break ;
1150
- }
1151
- Comment const * c = *it_;
1152
- if (c->getCommentKind () == CommentKind::TextComment)
1153
- {
1154
- ref += static_cast <TextComment const *>(c)->getText ();
1155
- }
1156
- else
1118
+ continue ;
1119
+ }
1120
+
1121
+ // The ref is fully parsed
1122
+ if (pres.ptr != last)
1123
+ {
1124
+ // The ref didn't consume all the text, so we need to
1125
+ // remove the leftover text from the ref and return it
1126
+ auto leftover = std::string (pres.ptr , last - pres.ptr );
1127
+ // If leftover is only whitespace, the ref might need
1128
+ // the next text comment to complete it.
1129
+ if (!isWhitespace (leftover))
1157
1130
{
1158
- break ;
1131
+ ref.erase (pres.ptr - first);
1132
+ return leftover;
1159
1133
}
1160
1134
}
1161
- if (rtrim (ref).ends_with (' )' ))
1135
+
1136
+ // The ref is fully parsed, but we might want to
1137
+ // include the next text comment if it contains
1138
+ // a valid continuation to the ref.
1139
+ bool const mightHaveMoreQualifiers =
1140
+ v.HasFunctionParameters &&
1141
+ v.ExceptionSpec .Implicit &&
1142
+ v.ExceptionSpec .Operand .empty ();
1143
+ if (mightHaveMoreQualifiers)
1162
1144
{
1163
- static constexpr std::array<std::string_view, 5 > qualifiers = {
1164
- " const" ,
1165
- " volatile" ,
1166
- " noexcept" ,
1167
- " &&" ,
1168
- " &" ,
1169
- };
1170
- auto isQualifiersOnly = [](std::string_view str)
1145
+ llvm::SmallVector<std::string_view, 4 > potentialQualifiers;
1146
+ if (v.Kind == ReferenceKind::None)
1171
1147
{
1172
- // Iterate all words between spaces and check if
1173
- // they are qualifiers
1174
- std::size_t pos = 0 ;
1175
- while (pos < str.size ())
1148
+ // "&&" or "&" not defined yet
1149
+ if (!v.IsConst )
1176
1150
{
1177
- std::size_t const start = str.find_first_not_of (' ' , pos);
1178
- if (start == std::string::npos)
1179
- {
1180
- break ;
1181
- }
1182
- std::size_t const end = str.find_first_of (' ' , start);
1183
- std::string_view word = str.substr (start, end - start);
1184
- if (std::ranges::find (qualifiers, word) == qualifiers.end ())
1185
- {
1186
- return false ;
1187
- }
1188
- pos = end;
1151
+ potentialQualifiers.push_back (" const" );
1189
1152
}
1190
- return true ;
1191
- };
1192
- auto isWhitespaceOnly = [](std::string_view str)
1193
- {
1194
- return str.empty () || str.find_first_not_of (' ' ) == std::string::npos;
1195
- };
1196
-
1197
- // peek next comment
1198
- std::string functionContinuation;
1199
- auto originalIt = it_;
1200
- ++it_;
1201
- while (
1202
- it_ != end_ &&
1203
- (isWhitespaceOnly (functionContinuation) ||
1204
- isQualifiersOnly (functionContinuation)))
1205
- {
1206
- Comment const * c = *it_;
1207
- if (c->getCommentKind () != CommentKind::TextComment)
1153
+ if (!v.IsVolatile )
1208
1154
{
1209
- break ;
1155
+ potentialQualifiers. push_back ( " volatile " ) ;
1210
1156
}
1211
- functionContinuation += static_cast <TextComment const *>(c)->getText ();
1212
- ++it_;
1157
+ potentialQualifiers.push_back (" &" );
1213
1158
}
1214
- if (isWhitespaceOnly (functionContinuation))
1159
+ else if (
1160
+ v.Kind == ReferenceKind::LValue &&
1161
+ ref.ends_with (' &' ))
1215
1162
{
1216
- it_ = originalIt;
1163
+ // The second "&" might be in the next Text block
1164
+ potentialQualifiers.push_back (" &" );
1217
1165
}
1218
- else /* if (!functionContinuation.empty()) */
1166
+ potentialQualifiers.push_back (" noexcept" );
1167
+ auto const nextTextOpt = peekNextIt ();
1168
+ if (!nextTextOpt)
1219
1169
{
1220
- --it_;
1221
- std::string_view suffix = functionContinuation;
1222
- std::string_view leftover = functionContinuation;
1223
- bool foundAny = false ;
1224
- std::size_t totalRemoved = 0 ;
1225
- while (!suffix.empty ())
1226
- {
1227
- bool found = false ;
1228
- std::size_t const initialWhitespace = std::min (
1229
- suffix.find_first_not_of (" " ), suffix.size ());
1230
- for (auto const & q : qualifiers)
1231
- {
1232
- if (suffix.substr (initialWhitespace).starts_with (q))
1233
- {
1234
- std::size_t const toRemove = initialWhitespace + q.size ();
1235
- if (
1236
- contains (idChars, q.back ()) &&
1237
- suffix.size () > toRemove &&
1238
- contains (idChars, suffix[toRemove]))
1239
- {
1240
- // This is not a qualifier, but part of
1241
- // an identifier
1242
- continue ;
1243
- }
1244
- suffix.remove_prefix (toRemove);
1245
- totalRemoved += toRemove;
1246
- found = true ;
1247
- foundAny = true ;
1248
- break ;
1249
- }
1250
- }
1251
- if (!found)
1170
+ auto leftover = std::string (pres.ptr , last - pres.ptr );
1171
+ ref.erase (pres.ptr - first);
1172
+ return leftover;
1173
+ }
1174
+ std::string_view const nextText = *nextTextOpt;
1175
+ std::string_view const trimmed = ltrim (nextText);
1176
+ if (trimmed.empty () ||
1177
+ std::ranges::any_of (
1178
+ potentialQualifiers,
1179
+ [&](std::string_view s)
1252
1180
{
1253
- break ;
1254
- }
1255
- }
1256
- if (foundAny)
1257
- {
1258
- leftover = leftover.substr (0 , totalRemoved);
1259
- ref += leftover;
1260
- return std::string (suffix);
1261
- }
1181
+ return trimmed.starts_with (s);
1182
+ }))
1183
+ {
1184
+ ref += nextText;
1185
+ ++it_;
1186
+ continue ;
1262
1187
}
1263
1188
}
1264
- }
1265
1189
1266
-
1267
- // Clang refs can also contain invalid characters
1268
- // at the end, especially punctuation. We need to
1269
- // truncate the ref at the last valid identifier
1270
- // character.
1271
- // The last identifier character depends on the type
1272
- // of ref.
1273
- // - If it's an operator but not a function, then
1274
- // we also consider operator chars as valid.
1275
- // - If it's a function, then we also consider ')'
1276
- // as valid.
1277
- // - In all cases, we consider the identifier chars
1278
- // as valid.
1279
- static constexpr std::string_view operatorChars =
1280
- " ~!%^&*()-+=|[]{};:,.<>?/" ;
1281
- static constexpr std::string_view parenChars =
1282
- " ()" ;
1283
- std::string leftover;
1284
- bool const isRegularIdentifier = !isFunction && !isNoFunctionOperator;
1285
- if (isRegularIdentifier)
1286
- {
1287
- auto const lastIdChar = ref.find_last_of (idChars);
1288
- auto const firstLeftoverChar = lastIdChar + 1 ;
1289
- if (firstLeftoverChar < ref.size ())
1290
- {
1291
- leftover = std::string_view (ref).substr (lastIdChar + 1 );
1292
- ref = ref.substr (0 , lastIdChar + 1 );
1293
- }
1294
- }
1295
- else if (isFunction)
1296
- {
1297
- auto reservedCharsets = {idChars, parenChars};
1298
- auto reservedChars = std::views::join (reservedCharsets);
1299
- auto const lastIdOrParen = find_last_of (ref, reservedChars);
1300
- auto const firstLeftoverChar =
1301
- lastIdOrParen == ref.end () ?
1302
- ref.end () :
1303
- std::next (lastIdOrParen);
1304
- if (firstLeftoverChar != ref.end ())
1190
+ // The ref might have more components
1191
+ bool const mightHaveMoreComponents =
1192
+ !v.HasFunctionParameters ;
1193
+ if (mightHaveMoreComponents)
1305
1194
{
1306
- leftover = std::string_view (firstLeftoverChar, ref.end ());
1307
- ref = ref.substr (0 , std::distance (ref.begin (), firstLeftoverChar));
1308
- }
1309
- }
1310
- else /* if (isNoFunctionOperator) */
1311
- {
1312
- auto reservedCharsets = {idChars, operatorChars};
1313
- auto reservedChars = std::views::join (reservedCharsets);
1314
- auto const lastIdOrOperator = find_last_of (ref, reservedChars);
1315
- auto const firstLeftoverChar =
1316
- lastIdOrOperator == ref.end () ?
1317
- ref.end () :
1318
- std::next (lastIdOrOperator);
1319
- if (firstLeftoverChar != ref.end ())
1320
- {
1321
- leftover = std::string_view (firstLeftoverChar, ref.end ());
1322
- ref = ref.substr (0 , std::distance (ref.begin (), firstLeftoverChar));
1195
+ auto const nextTextOpt = peekNextIt ();
1196
+ if (!nextTextOpt)
1197
+ {
1198
+ auto leftover = std::string (pres.ptr , last - pres.ptr );
1199
+ ref.erase (pres.ptr - first);
1200
+ return leftover;
1201
+ }
1202
+ std::string_view const nextText = *nextTextOpt;
1203
+ std::string_view const trimmed = ltrim (nextText);
1204
+ static constexpr std::string_view idChars
1205
+ = " abcdefghijklmnopqrstuvwxyz"
1206
+ " ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1207
+ " 0123456789"
1208
+ " _:" ;
1209
+ if (trimmed.empty () ||
1210
+ contains (idChars, trimmed.front ()))
1211
+ {
1212
+ ref += nextText;
1213
+ ++it_;
1214
+ continue ;
1215
+ }
1323
1216
}
1217
+
1218
+ return {};
1324
1219
}
1325
- return leftover;
1326
1220
}
1327
1221
1328
1222
// ------------------------------------------------
0 commit comments