Skip to content

Commit 3039bde

Browse files
yfeldblummeta-codesync[bot]
authored andcommitted
cut fast-path indirection in FormattedKeyHolder
Summary: When the current lookup is found in the last-cache, avoid an indirection in returning the formatted-key and avoid an indirection in returning the cached-value. Reviewed By: DenisYaroshevskiy Differential Revision: D59570652 fbshipit-source-id: 3859086bf33095b03aa03454eb9cdf4819c5b74f
1 parent 80c1f87 commit 3039bde

File tree

4 files changed

+76
-52
lines changed

4 files changed

+76
-52
lines changed

fb303/BUCK

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ cpp_library(
238238
"//folly/container:f14_hash",
239239
"//folly/experimental:function_scheduler",
240240
"//folly/hash:rapidhash",
241+
"//folly/lang:align",
241242
"//folly/synchronization:call_once",
242243
],
243244
)

fb303/ThreadCachedServiceData.h

Lines changed: 55 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <folly/container/F14Map.h>
4343
#include <folly/experimental/FunctionScheduler.h>
4444
#include <folly/hash/rapidhash.h>
45+
#include <folly/lang/Align.h>
4546
#include <folly/synchronization/CallOnce.h>
4647

4748
namespace facebook::fb303 {
@@ -423,27 +424,26 @@ struct HistogramSpec {
423424
}
424425
};
425426

426-
namespace detail {
427-
struct Nothing {};
428-
429-
template <typename CachedFieldType>
430-
struct CachedStorage {
431-
const std::string* key;
432-
CachedFieldType cached;
433-
};
434-
435-
template <>
436-
struct CachedStorage<detail::Nothing> {
437-
const std::string* key;
438-
[[FOLLY_ATTR_NO_UNIQUE_ADDRESS]] detail::Nothing cached;
439-
};
440-
441-
} // namespace detail
442-
443-
template <size_t N, typename CachedType = detail::Nothing>
427+
template <size_t N, typename CacheFun = folly::variadic_noop_fn>
444428
class FormattedKeyHolder {
445429
private:
446-
using ValueType = detail::CachedStorage<CachedType>;
430+
template <typename T>
431+
using ElementTypeOf = typename T::element_type;
432+
using CacheFunRes = std::invoke_result_t<CacheFun const&, std::string_view>;
433+
using CacheValue = folly::detected_or_t<void, ElementTypeOf, CacheFunRes>;
434+
using CacheValueRef = std::add_lvalue_reference_t<CacheValue>;
435+
struct ValueTypeVoid {
436+
const std::string* key;
437+
[[no_unique_address]] std::monostate cached;
438+
};
439+
struct ValueTypeReal {
440+
const std::string* key;
441+
std::shared_ptr<CacheValue> cached;
442+
};
443+
using ValueType = std::conditional_t< //
444+
std::is_void_v<CacheValue>,
445+
ValueTypeVoid,
446+
ValueTypeReal>;
447447

448448
public:
449449
using Subkey = std::variant<int64_t, std::string>;
@@ -585,10 +585,15 @@ class FormattedKeyHolder {
585585
SubkeyArrayHash,
586586
SubkeyArrayEqualTo>;
587587
using LocalMapIterator = typename LocalMap::iterator;
588+
struct LocalEntry {
589+
const std::string* key{};
590+
CacheValue* cached{};
591+
};
592+
static_assert(folly::kMscVer || folly::is_register_pass_v<LocalEntry>);
588593
struct LocalMapItemRef {
589594
size_t hash{};
590595
const SubkeyArray* key{}; // null at init and after erase
591-
ValueType* value{};
596+
LocalEntry entry{};
592597
};
593598
static inline constexpr size_t LocalMapAndLastAlign =
594599
folly::hardware_constructive_interference_size;
@@ -625,10 +630,10 @@ class FormattedKeyHolder {
625630
* v.s. via the stack and defers the dereference operation to the cold path.
626631
*/
627632
template <typename... Args>
628-
std::pair<const std::string&, std::reference_wrapper<CachedType>>
633+
std::pair<const std::string&, std::reference_wrapper<CacheValue>>
629634
getFormattedKeyWithExtra(Args&&... subkeys) {
630-
auto& v = getFormattedEntry(std::forward<Args>(subkeys)...);
631-
return {*v.key, v.cached};
635+
auto entry = getFormattedEntry(std::forward<Args>(subkeys)...);
636+
return {*entry.key, *entry.cached};
632637
}
633638

634639
template <typename... Args>
@@ -653,7 +658,7 @@ class FormattedKeyHolder {
653658

654659
private:
655660
template <typename... Args>
656-
ValueType& getFormattedEntry(Args&&... subkeys) {
661+
LocalEntry getFormattedEntry(Args&&... subkeys) {
657662
static_assert(sizeof...(Args) == N, "Incorrect number of subkeys.");
658663
static_assert(
659664
(IsValidSubkey<folly::remove_cvref_t<Args>> && ...),
@@ -666,7 +671,7 @@ class FormattedKeyHolder {
666671
if (FOLLY_LIKELY(
667672
local.last.key && keyhash == local.last.hash &&
668673
local.map.key_eq()(*local.last.key, keytup))) {
669-
return *local.last.value;
674+
return local.last.entry;
670675
}
671676
// calling outline folly::get_default would be a small perf hit, so call
672677
auto it = local.map.find( // map-find inline to avoid it
@@ -678,8 +683,12 @@ class FormattedKeyHolder {
678683
// if the map was updated, existing references held in local.last would be
679684
// invalid ... but we reset them all to the just-found item anyway, so
680685
// we do not hold onto invalid references
681-
local.last = {keyhash, &it->first.get(), &it->second};
682-
return it->second;
686+
auto entry = LocalEntry{it->second.key};
687+
if constexpr (!std::is_void_v<CacheValue>) {
688+
entry.cached = it->second.cached.get();
689+
}
690+
local.last = {keyhash, &it->first.get(), entry};
691+
return entry;
683692
}
684693

685694
template <typename T>
@@ -706,7 +715,12 @@ class FormattedKeyHolder {
706715
mkvar_<int64_t>{}, mkvar_<std::string_view, std::string>{});
707716
auto& local = *localMap_;
708717
auto const& [k, v] = getFormattedKeyGlobal({mkvar(subkeys)...});
709-
return local.map.emplace(std::cref(k), ValueType{&v, CachedType{}}).first;
718+
ValueType value{&v, {}};
719+
if constexpr (!std::is_void_v<CacheValue>) {
720+
CacheFun const fn{};
721+
value.cached = fn(std::string_view{v});
722+
}
723+
return local.map.emplace(std::cref(k), std::move(value)).first;
710724
}
711725

712726
/**
@@ -1363,10 +1377,18 @@ class SubminuteMinuteOnlyHistogram : public HistogramWrapper {
13631377
*/
13641378
template <int N> // N is the number of subkeys.
13651379
class DynamicTimeseriesWrapper {
1380+
private:
1381+
struct GetTLTimeseriesFn {
1382+
std::shared_ptr<ThreadCachedServiceData::TLTimeseries> operator()(
1383+
std::string_view key) const {
1384+
ThreadCachedServiceData::ThreadLocalStatsMap& tcData =
1385+
*ThreadCachedServiceData::getStatsThreadLocal();
1386+
return tcData.getTimeseriesSafe(key);
1387+
}
1388+
};
1389+
13661390
public:
1367-
using KeyHolder = internal::FormattedKeyHolder<
1368-
N,
1369-
std::shared_ptr<ThreadCachedServiceData::TLTimeseries>>;
1391+
using KeyHolder = internal::FormattedKeyHolder<N, GetTLTimeseriesFn>;
13701392

13711393
DynamicTimeseriesWrapper(
13721394
std::string keyFormat,
@@ -1481,14 +1503,8 @@ class DynamicTimeseriesWrapper {
14811503

14821504
template <typename... Args>
14831505
void addImpl(int64_t value, Args... subkeys) {
1484-
auto key = key_.getFormattedKeyWithExtra(subkeys...);
1485-
if (key.second.get() == nullptr) {
1486-
ThreadCachedServiceData::ThreadLocalStatsMap& tcData =
1487-
*ThreadCachedServiceData::getStatsThreadLocal();
1488-
// Cache thread local counter
1489-
key.second.get() = tcData.getTimeseriesSafe(key.first);
1490-
}
1491-
key.second.get()->addValue(value);
1506+
auto entry = key_.getFormattedKeyWithExtra(std::forward<Args>(subkeys)...);
1507+
entry.second.get().addValue(value);
14921508
}
14931509

14941510
inline ThreadCachedServiceData::ThreadLocalStatsMap& tcData() {

fb303/detail/QuantileStatWrappers-inl.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,21 @@
2121

2222
namespace facebook::fb303::detail {
2323

24+
template <size_t N>
25+
void DynamicQuantileStatWrapper<N>::doPrepareKey(std::string_view key) {
26+
ServiceData::get()->getQuantileStat(
27+
key, spec_.stats, spec_.quantiles, spec_.timeseriesLengths);
28+
}
29+
2430
template <size_t N>
2531
DynamicQuantileStatWrapper<N>::DynamicQuantileStatWrapper(
2632
std::string keyFormat,
2733
folly::Range<const ExportType*> stats,
2834
folly::Range<const double*> quantiles,
2935
folly::Range<const size_t*> timeseriesLengths)
30-
: key_(std::move(keyFormat), nullptr) {
36+
: key_(std::move(keyFormat), [this](std::string_view key) {
37+
doPrepareKey(key);
38+
}) {
3139
spec_.stats.insert(spec_.stats.end(), stats.begin(), stats.end());
3240
spec_.quantiles.insert(
3341
spec_.quantiles.end(), quantiles.begin(), quantiles.end());
@@ -43,16 +51,8 @@ void DynamicQuantileStatWrapper<N>::addValue(
4351
double value,
4452
std::chrono::steady_clock::time_point now,
4553
Args&&... subkeys) {
46-
auto key = key_.getFormattedKeyWithExtra(std::forward<Args>(subkeys)...);
47-
if (key.second.get() == nullptr) {
48-
auto& cache = *cache_;
49-
auto ptr = folly::get_ptr(cache, key.first);
50-
if (!ptr) {
51-
key.second.get() = cache[key.first] = ServiceData::get()->getQuantileStat(
52-
key.first, spec_.stats, spec_.quantiles, spec_.timeseriesLengths);
53-
}
54-
}
55-
key.second.get()->addValue(value, now);
54+
auto entry = key_.getFormattedKeyWithExtra(std::forward<Args>(subkeys)...);
55+
return entry.second.get().addValue(value, now);
5656
}
5757

5858
template <size_t N>

fb303/detail/QuantileStatWrappers.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,15 @@ class DynamicQuantileStatWrapper {
8181
using StatPtr = std::shared_ptr<QuantileStat>;
8282
using Cache = folly::F14FastMap<std::string_view, StatPtr>;
8383

84-
internal::FormattedKeyHolder<N, StatPtr> key_;
85-
folly::ThreadLocal<Cache> cache_;
84+
struct GetTLQuantileStatFn {
85+
std::shared_ptr<QuantileStat> operator()(std::string_view key) const {
86+
return ServiceData::get()->getQuantileStat(key);
87+
}
88+
};
89+
90+
void doPrepareKey(std::string_view key);
91+
92+
internal::FormattedKeyHolder<N, GetTLQuantileStatFn> key_;
8693
Spec spec_;
8794
};
8895

0 commit comments

Comments
 (0)