Skip to content

Commit 82d3edb

Browse files
committed
s390/cpum_sf: add READ_ONCE() semantics to compare and swap loops
The current cmpxchg_double() loops within the perf hw sampling code do not have READ_ONCE() semantics to read the old value from memory. This allows the compiler to generate code which reads the "old" value several times from memory, which again allows for inconsistencies. For example: /* Reset trailer (using compare-double-and-swap) */ do { te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; te_flags |= SDB_TE_ALERT_REQ_MASK; } while (!cmpxchg_double(&te->flags, &te->overflow, te->flags, te->overflow, te_flags, 0ULL)); The compiler could generate code where te->flags used within the cmpxchg_double() call may be refetched from memory and which is not necessarily identical to the previous read version which was used to generate te_flags. Which in turn means that an incorrect update could happen. Fix this by adding READ_ONCE() semantics to all cmpxchg_double() loops. Given that READ_ONCE() cannot generate code on s390 which atomically reads 16 bytes, use a private compare-and-swap-double implementation to achieve that. Also replace cmpxchg_double() with the private implementation to be able to re-use the old value within the loops. As a side effect this converts the whole code to only use bit fields to read and modify bits within the hws trailer header. Reported-by: Alexander Gordeev <[email protected]> Acked-by: Alexander Gordeev <[email protected]> Acked-by: Hendrik Brueckner <[email protected]> Reviewed-by: Thomas Richter <[email protected]> Cc: <[email protected]> Link: https://lore.kernel.org/linux-s390/Y71QJBhNTIatvxUT@osiris/T/#ma14e2a5f7aa8ed4b94b6f9576799b3ad9c60f333 Signed-off-by: Heiko Carstens <[email protected]>
1 parent c2337a4 commit 82d3edb

File tree

2 files changed

+77
-55
lines changed

2 files changed

+77
-55
lines changed

arch/s390/include/asm/cpu_mf.h

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -131,19 +131,21 @@ struct hws_combined_entry {
131131
struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
132132
} __packed;
133133

134-
struct hws_trailer_entry {
135-
union {
136-
struct {
137-
unsigned int f:1; /* 0 - Block Full Indicator */
138-
unsigned int a:1; /* 1 - Alert request control */
139-
unsigned int t:1; /* 2 - Timestamp format */
140-
unsigned int :29; /* 3 - 31: Reserved */
141-
unsigned int bsdes:16; /* 32-47: size of basic SDE */
142-
unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
143-
};
144-
unsigned long long flags; /* 0 - 63: All indicators */
134+
union hws_trailer_header {
135+
struct {
136+
unsigned int f:1; /* 0 - Block Full Indicator */
137+
unsigned int a:1; /* 1 - Alert request control */
138+
unsigned int t:1; /* 2 - Timestamp format */
139+
unsigned int :29; /* 3 - 31: Reserved */
140+
unsigned int bsdes:16; /* 32-47: size of basic SDE */
141+
unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
142+
unsigned long long overflow; /* 64 - Overflow Count */
145143
};
146-
unsigned long long overflow; /* 64 - sample Overflow count */
144+
__uint128_t val;
145+
};
146+
147+
struct hws_trailer_entry {
148+
union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count */
147149
unsigned char timestamp[16]; /* 16 - 31 timestamp */
148150
unsigned long long reserved1; /* 32 -Reserved */
149151
unsigned long long reserved2; /* */
@@ -290,14 +292,11 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
290292
return USEC_PER_SEC * qsi->cpu_speed / rate;
291293
}
292294

293-
#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL
294-
#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
295-
296295
/* Return TOD timestamp contained in an trailer entry */
297296
static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
298297
{
299298
/* TOD in STCKE format */
300-
if (te->t)
299+
if (te->header.t)
301300
return *((unsigned long long *) &te->timestamp[1]);
302301

303302
/* TOD in STCK format */

arch/s390/kernel/perf_cpum_sf.c

Lines changed: 62 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,15 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
163163

164164
static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
165165
{
166-
unsigned long sdb, *trailer;
166+
struct hws_trailer_entry *te;
167+
unsigned long sdb;
167168

168169
/* Allocate and initialize sample-data-block */
169170
sdb = get_zeroed_page(gfp_flags);
170171
if (!sdb)
171172
return -ENOMEM;
172-
trailer = trailer_entry_ptr(sdb);
173-
*trailer = SDB_TE_ALERT_REQ_MASK;
173+
te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
174+
te->header.a = 1;
174175

175176
/* Link SDB into the sample-data-block-table */
176177
*sdbt = sdb;
@@ -1206,7 +1207,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
12061207
"%s: Found unknown"
12071208
" sampling data entry: te->f %i"
12081209
" basic.def %#4x (%p)\n", __func__,
1209-
te->f, sample->def, sample);
1210+
te->header.f, sample->def, sample);
12101211
/* Sample slot is not yet written or other record.
12111212
*
12121213
* This condition can occur if the buffer was reused
@@ -1217,7 +1218,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
12171218
* that are not full. Stop processing if the first
12181219
* invalid format was detected.
12191220
*/
1220-
if (!te->f)
1221+
if (!te->header.f)
12211222
break;
12221223
}
12231224

@@ -1227,6 +1228,16 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
12271228
}
12281229
}
12291230

1231+
static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new)
1232+
{
1233+
asm volatile(
1234+
" cdsg %[old],%[new],%[ptr]\n"
1235+
: [old] "+d" (old), [ptr] "+QS" (*ptr)
1236+
: [new] "d" (new)
1237+
: "memory", "cc");
1238+
return old;
1239+
}
1240+
12301241
/* hw_perf_event_update() - Process sampling buffer
12311242
* @event: The perf event
12321243
* @flush_all: Flag to also flush partially filled sample-data-blocks
@@ -1243,10 +1254,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
12431254
*/
12441255
static void hw_perf_event_update(struct perf_event *event, int flush_all)
12451256
{
1257+
unsigned long long event_overflow, sampl_overflow, num_sdb;
1258+
union hws_trailer_header old, prev, new;
12461259
struct hw_perf_event *hwc = &event->hw;
12471260
struct hws_trailer_entry *te;
12481261
unsigned long *sdbt;
1249-
unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
12501262
int done;
12511263

12521264
/*
@@ -1266,25 +1278,25 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
12661278
te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
12671279

12681280
/* Leave loop if no more work to do (block full indicator) */
1269-
if (!te->f) {
1281+
if (!te->header.f) {
12701282
done = 1;
12711283
if (!flush_all)
12721284
break;
12731285
}
12741286

12751287
/* Check the sample overflow count */
1276-
if (te->overflow)
1288+
if (te->header.overflow)
12771289
/* Account sample overflows and, if a particular limit
12781290
* is reached, extend the sampling buffer.
12791291
* For details, see sfb_account_overflows().
12801292
*/
1281-
sampl_overflow += te->overflow;
1293+
sampl_overflow += te->header.overflow;
12821294

12831295
/* Timestamps are valid for full sample-data-blocks only */
12841296
debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
12851297
"overflow %llu timestamp %#llx\n",
1286-
__func__, (unsigned long)sdbt, te->overflow,
1287-
(te->f) ? trailer_timestamp(te) : 0ULL);
1298+
__func__, (unsigned long)sdbt, te->header.overflow,
1299+
(te->header.f) ? trailer_timestamp(te) : 0ULL);
12881300

12891301
/* Collect all samples from a single sample-data-block and
12901302
* flag if an (perf) event overflow happened. If so, the PMU
@@ -1294,12 +1306,16 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
12941306
num_sdb++;
12951307

12961308
/* Reset trailer (using compare-double-and-swap) */
1309+
/* READ_ONCE() 16 byte header */
1310+
prev.val = __cdsg(&te->header.val, 0, 0);
12971311
do {
1298-
te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
1299-
te_flags |= SDB_TE_ALERT_REQ_MASK;
1300-
} while (!cmpxchg_double(&te->flags, &te->overflow,
1301-
te->flags, te->overflow,
1302-
te_flags, 0ULL));
1312+
old.val = prev.val;
1313+
new.val = prev.val;
1314+
new.f = 0;
1315+
new.a = 1;
1316+
new.overflow = 0;
1317+
prev.val = __cdsg(&te->header.val, old.val, new.val);
1318+
} while (prev.val != old.val);
13031319

13041320
/* Advance to next sample-data-block */
13051321
sdbt++;
@@ -1384,15 +1400,15 @@ static void aux_output_end(struct perf_output_handle *handle)
13841400
range_scan = AUX_SDB_NUM_ALERT(aux);
13851401
for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
13861402
te = aux_sdb_trailer(aux, idx);
1387-
if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
1403+
if (!te->header.f)
13881404
break;
13891405
}
13901406
/* i is num of SDBs which are full */
13911407
perf_aux_output_end(handle, i << PAGE_SHIFT);
13921408

13931409
/* Remove alert indicators in the buffer */
13941410
te = aux_sdb_trailer(aux, aux->alert_mark);
1395-
te->flags &= ~SDB_TE_ALERT_REQ_MASK;
1411+
te->header.a = 0;
13961412

13971413
debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
13981414
__func__, i, range_scan, aux->head);
@@ -1437,9 +1453,9 @@ static int aux_output_begin(struct perf_output_handle *handle,
14371453
idx = aux->empty_mark + 1;
14381454
for (i = 0; i < range_scan; i++, idx++) {
14391455
te = aux_sdb_trailer(aux, idx);
1440-
te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
1441-
SDB_TE_ALERT_REQ_MASK);
1442-
te->overflow = 0;
1456+
te->header.f = 0;
1457+
te->header.a = 0;
1458+
te->header.overflow = 0;
14431459
}
14441460
/* Save the position of empty SDBs */
14451461
aux->empty_mark = aux->head + range - 1;
@@ -1448,7 +1464,7 @@ static int aux_output_begin(struct perf_output_handle *handle,
14481464
/* Set alert indicator */
14491465
aux->alert_mark = aux->head + range/2 - 1;
14501466
te = aux_sdb_trailer(aux, aux->alert_mark);
1451-
te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
1467+
te->header.a = 1;
14521468

14531469
/* Reset hardware buffer head */
14541470
head = AUX_SDB_INDEX(aux, aux->head);
@@ -1475,25 +1491,28 @@ static int aux_output_begin(struct perf_output_handle *handle,
14751491
static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
14761492
unsigned long long *overflow)
14771493
{
1478-
unsigned long long orig_overflow, orig_flags, new_flags;
1494+
union hws_trailer_header old, prev, new;
14791495
struct hws_trailer_entry *te;
14801496

14811497
te = aux_sdb_trailer(aux, alert_index);
1498+
/* READ_ONCE() 16 byte header */
1499+
prev.val = __cdsg(&te->header.val, 0, 0);
14821500
do {
1483-
orig_flags = te->flags;
1484-
*overflow = orig_overflow = te->overflow;
1485-
if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
1501+
old.val = prev.val;
1502+
new.val = prev.val;
1503+
*overflow = old.overflow;
1504+
if (old.f) {
14861505
/*
14871506
* SDB is already set by hardware.
14881507
* Abort and try to set somewhere
14891508
* behind.
14901509
*/
14911510
return false;
14921511
}
1493-
new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
1494-
} while (!cmpxchg_double(&te->flags, &te->overflow,
1495-
orig_flags, orig_overflow,
1496-
new_flags, 0ULL));
1512+
new.a = 1;
1513+
new.overflow = 0;
1514+
prev.val = __cdsg(&te->header.val, old.val, new.val);
1515+
} while (prev.val != old.val);
14971516
return true;
14981517
}
14991518

@@ -1522,8 +1541,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
15221541
static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
15231542
unsigned long long *overflow)
15241543
{
1525-
unsigned long long orig_overflow, orig_flags, new_flags;
15261544
unsigned long i, range_scan, idx, idx_old;
1545+
union hws_trailer_header old, prev, new;
1546+
unsigned long long orig_overflow;
15271547
struct hws_trailer_entry *te;
15281548

15291549
debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
@@ -1554,17 +1574,20 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
15541574
idx_old = idx = aux->empty_mark + 1;
15551575
for (i = 0; i < range_scan; i++, idx++) {
15561576
te = aux_sdb_trailer(aux, idx);
1577+
/* READ_ONCE() 16 byte header */
1578+
prev.val = __cdsg(&te->header.val, 0, 0);
15571579
do {
1558-
orig_flags = te->flags;
1559-
orig_overflow = te->overflow;
1560-
new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
1580+
old.val = prev.val;
1581+
new.val = prev.val;
1582+
orig_overflow = old.overflow;
1583+
new.f = 0;
1584+
new.overflow = 0;
15611585
if (idx == aux->alert_mark)
1562-
new_flags |= SDB_TE_ALERT_REQ_MASK;
1586+
new.a = 1;
15631587
else
1564-
new_flags &= ~SDB_TE_ALERT_REQ_MASK;
1565-
} while (!cmpxchg_double(&te->flags, &te->overflow,
1566-
orig_flags, orig_overflow,
1567-
new_flags, 0ULL));
1588+
new.a = 0;
1589+
prev.val = __cdsg(&te->header.val, old.val, new.val);
1590+
} while (prev.val != old.val);
15681591
*overflow += orig_overflow;
15691592
}
15701593

0 commit comments

Comments
 (0)