Skip to content

Commit 0d2aa70

Browse files
ardbiesheuvelsuryasaimadhu
authored andcommitted
apei/ghes: Use xchg_release() for updating new cache slot instead of cmpxchg()
Some documentation first, about how this machinery works: It seems, the intent of the GHES error records cache is to collect already reported errors - see the ghes_estatus_cached() checks. There's even a sentence trying to say what this does: /* * GHES error status reporting throttle, to report more kinds of * errors, instead of just most frequently occurred errors. */ New elements are added to the cache this way: if (!ghes_estatus_cached(estatus)) { if (ghes_print_estatus(NULL, ghes->generic, estatus)) ghes_estatus_cache_add(ghes->generic, estatus); The intent being, once this new error record is reported, it gets cached so that it doesn't get reported for a while due to too many, same-type error records getting reported in burst-like scenarios. I.e., new, unreported error types can have a higher chance of getting reported. Now, the loop in ghes_estatus_cache_add() is trying to pick out the oldest element in there. Meaning, something which got reported already but a long while ago, i.e., a LRU-type scheme. And the cmpxchg() is there presumably to make sure when that selected element slot_cache is removed, it really *is* that element that gets removed and not one which replaced it in the meantime. Now, ghes_estatus_cache_add() selects a slot, and either succeeds in replacing its contents with a pointer to a newly cached item, or it just gives up and frees the new item again, without attempting to select another slot even if one might be available. Since only inserting new items is being done here, the race can only cause a failure if the selected slot was updated with another new item concurrently, which means that it is arbitrary which of those two items gets dropped. And "dropped" here means, the item doesn't get added to the cache so the next time it is seen, it'll get reported again and an insertion attempt will be done again. Eventually, it'll get inserted and all those times when the insertion fails, the item will get reported although the cache is supposed to prevent that and "ratelimit" those repeated error records. Not a big deal in any case. This means the cmpxchg() and the special case are not necessary. Therefore, just drop the existing item unconditionally. Move the xchg_release() and call_rcu() out of rcu_read_lock/unlock section since there is no actually dereferencing the pointer at all. [ bp: - Flesh out and summarize what was discussed on the thread now that that cache contraption is understood; - Touch up code style. ] Co-developed-by: Jia He <[email protected]> Signed-off-by: Jia He <[email protected]> Signed-off-by: Ard Biesheuvel <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 315bada commit 0d2aa70

File tree

1 file changed

+33
-27
lines changed

1 file changed

+33
-27
lines changed

drivers/acpi/apei/ghes.c

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ struct ghes_vendor_record_entry {
154154
static struct gen_pool *ghes_estatus_pool;
155155
static unsigned long ghes_estatus_pool_size_request;
156156

157-
static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
157+
static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
158158
static atomic_t ghes_estatus_cache_alloced;
159159

160160
static int ghes_panic_timeout __read_mostly = 30;
@@ -789,48 +789,42 @@ static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
789789
return cache;
790790
}
791791

792-
static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
792+
static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
793793
{
794+
struct ghes_estatus_cache *cache;
794795
u32 len;
795796

797+
cache = container_of(head, struct ghes_estatus_cache, rcu);
796798
len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
797799
len = GHES_ESTATUS_CACHE_LEN(len);
798800
gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
799801
atomic_dec(&ghes_estatus_cache_alloced);
800802
}
801803

802-
static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
803-
{
804-
struct ghes_estatus_cache *cache;
805-
806-
cache = container_of(head, struct ghes_estatus_cache, rcu);
807-
ghes_estatus_cache_free(cache);
808-
}
809-
810-
static void ghes_estatus_cache_add(
811-
struct acpi_hest_generic *generic,
812-
struct acpi_hest_generic_status *estatus)
804+
static void
805+
ghes_estatus_cache_add(struct acpi_hest_generic *generic,
806+
struct acpi_hest_generic_status *estatus)
813807
{
814-
int i, slot = -1, count;
815808
unsigned long long now, duration, period, max_period = 0;
816-
struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
809+
struct ghes_estatus_cache *cache, *new_cache;
810+
struct ghes_estatus_cache __rcu *victim;
811+
int i, slot = -1, count;
817812

818813
new_cache = ghes_estatus_cache_alloc(generic, estatus);
819-
if (new_cache == NULL)
814+
if (!new_cache)
820815
return;
816+
821817
rcu_read_lock();
822818
now = sched_clock();
823819
for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
824820
cache = rcu_dereference(ghes_estatus_caches[i]);
825821
if (cache == NULL) {
826822
slot = i;
827-
slot_cache = NULL;
828823
break;
829824
}
830825
duration = now - cache->time_in;
831826
if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
832827
slot = i;
833-
slot_cache = cache;
834828
break;
835829
}
836830
count = atomic_read(&cache->count);
@@ -839,18 +833,30 @@ static void ghes_estatus_cache_add(
839833
if (period > max_period) {
840834
max_period = period;
841835
slot = i;
842-
slot_cache = cache;
843836
}
844837
}
845-
/* new_cache must be put into array after its contents are written */
846-
smp_wmb();
847-
if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
848-
slot_cache, new_cache) == slot_cache) {
849-
if (slot_cache)
850-
call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
851-
} else
852-
ghes_estatus_cache_free(new_cache);
853838
rcu_read_unlock();
839+
840+
if (slot != -1) {
841+
/*
842+
* Use release semantics to ensure that ghes_estatus_cached()
843+
* running on another CPU will see the updated cache fields if
844+
* it can see the new value of the pointer.
845+
*/
846+
victim = xchg_release(&ghes_estatus_caches[slot],
847+
RCU_INITIALIZER(new_cache));
848+
849+
/*
850+
* At this point, victim may point to a cached item different
851+
* from the one based on which we selected the slot. Instead of
852+
* going to the loop again to pick another slot, let's just
853+
* drop the other item anyway: this may cause a false cache
854+
* miss later on, but that won't cause any problems.
855+
*/
856+
if (victim)
857+
call_rcu(&unrcu_pointer(victim)->rcu,
858+
ghes_estatus_cache_rcu_free);
859+
}
854860
}
855861

856862
static void __ghes_panic(struct ghes *ghes,

0 commit comments

Comments
 (0)