Skip to content

Commit 35c8fad

Browse files
committed
Merge tag 'perf-tools-for-v5.16-2021-11-13' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull more perf tools updates from Arnaldo Carvalho de Melo: "Hardware tracing: - ARM: * Print the size of the buffer size consistently in hexadecimal in ARM Coresight. * Add Coresight snapshot mode support. * Update --switch-events docs in 'perf record'. * Support hardware-based PID tracing. * Track task context switch for cpu-mode events. - Vendor events: * Add metric events JSON file for power10 platform perf test: - Get 'perf test' unit tests closer to kunit. - Topology tests improvements. - Remove bashisms from some tests. perf bench: - Fix memory leak of perf_cpu_map__new() in the futex benchmarks. libbpf: - Add some more weak libbpf functions o allow building with the libbpf versions, old ones, present in distros. libbeauty: - Translate [gs]setsockopt 'level' argument integer values to strings. tools headers UAPI: - Sync futex_waitv, arch prctl, sound, i195_drm and msr-index files with the kernel sources. Documentation: - Add documentation to 'struct symbol'. - Synchronize the definition of enum perf_hw_id with code in tools/perf/design.txt" * tag 'perf-tools-for-v5.16-2021-11-13' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (67 commits) perf tests: Remove bash constructs from stat_all_pmu.sh perf tests: Remove bash construct from record+zstd_comp_decomp.sh perf test: Remove bash construct from stat_bpf_counters.sh test perf bench futex: Fix memory leak of perf_cpu_map__new() tools arch x86: Sync the msr-index.h copy with the kernel sources tools headers UAPI: Sync drm/i915_drm.h with the kernel sources tools headers UAPI: Sync sound/asound.h with the kernel sources tools headers UAPI: Sync linux/prctl.h with the kernel sources tools headers UAPI: Sync arch prctl headers with the kernel sources perf tools: Add more weak libbpf functions perf bpf: Avoid memory leak from perf_env__insert_btf() perf symbols: Factor out annotation init/exit perf symbols: Bit pack to save a byte perf symbols: Add documentation to 'struct symbol' tools headers UAPI: Sync files changed by new futex_waitv syscall perf test bpf: Use ARRAY_CHECK() instead of ad-hoc equivalent, addressing array_size.cocci warning perf arm-spe: Support hardware-based PID tracing perf arm-spe: Save context ID in record perf arm-spe: Update --switch-events docs in 'perf record' perf arm-spe: Track task context switch for cpu-mode events ...
2 parents c8c1095 + ac96f46 commit 35c8fad

File tree

131 files changed

+2639
-1148
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

131 files changed

+2639
-1148
lines changed

tools/arch/x86/include/asm/msr-index.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,8 @@
625625

626626
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
627627

628+
#define MSR_IA32_XFD 0x000001c4
629+
#define MSR_IA32_XFD_ERR 0x000001c5
628630
#define MSR_IA32_XSS 0x00000da0
629631

630632
#define MSR_IA32_APICBASE 0x0000001b

tools/arch/x86/include/uapi/asm/prctl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
#define ARCH_GET_CPUID 0x1011
1111
#define ARCH_SET_CPUID 0x1012
1212

13+
#define ARCH_GET_XCOMP_SUPP 0x1021
14+
#define ARCH_GET_XCOMP_PERM 0x1022
15+
#define ARCH_REQ_XCOMP_PERM 0x1023
16+
1317
#define ARCH_MAP_VDSO_X32 0x2001
1418
#define ARCH_MAP_VDSO_32 0x2002
1519
#define ARCH_MAP_VDSO_64 0x2003

tools/include/uapi/asm-generic/unistd.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,8 +880,11 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
880880
#define __NR_process_mrelease 448
881881
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
882882

883+
#define __NR_futex_waitv 449
884+
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
885+
883886
#undef __NR_syscalls
884-
#define __NR_syscalls 449
887+
#define __NR_syscalls 450
885888

886889
/*
887890
* 32 bit systems traditionally used different

tools/include/uapi/drm/i915_drm.h

Lines changed: 241 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1522,6 +1522,12 @@ struct drm_i915_gem_caching {
15221522
#define I915_TILING_NONE 0
15231523
#define I915_TILING_X 1
15241524
#define I915_TILING_Y 2
1525+
/*
1526+
* Do not add new tiling types here. The I915_TILING_* values are for
1527+
* de-tiling fence registers that no longer exist on modern platforms. Although
1528+
* the hardware may support new types of tiling in general (e.g., Tile4), we
1529+
* do not need to add them to the uapi that is specific to now-defunct ioctls.
1530+
*/
15251531
#define I915_TILING_LAST I915_TILING_Y
15261532

15271533
#define I915_BIT_6_SWIZZLE_NONE 0
@@ -1824,6 +1830,7 @@ struct drm_i915_gem_context_param {
18241830
* Extensions:
18251831
* i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
18261832
* i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
1833+
* i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
18271834
*/
18281835
#define I915_CONTEXT_PARAM_ENGINES 0xa
18291836

@@ -1846,6 +1853,55 @@ struct drm_i915_gem_context_param {
18461853
* attempted to use it, never re-use this context param number.
18471854
*/
18481855
#define I915_CONTEXT_PARAM_RINGSIZE 0xc
1856+
1857+
/*
1858+
* I915_CONTEXT_PARAM_PROTECTED_CONTENT:
1859+
*
1860+
* Mark that the context makes use of protected content, which will result
1861+
* in the context being invalidated when the protected content session is.
1862+
* Given that the protected content session is killed on suspend, the device
1863+
* is kept awake for the lifetime of a protected context, so the user should
1864+
* make sure to dispose of them once done.
1865+
* This flag can only be set at context creation time and, when set to true,
1866+
* must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE
1867+
* to false. This flag can't be set to true in conjunction with setting the
1868+
* I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example:
1869+
*
1870+
* .. code-block:: C
1871+
*
1872+
* struct drm_i915_gem_context_create_ext_setparam p_protected = {
1873+
* .base = {
1874+
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
1875+
* },
1876+
* .param = {
1877+
* .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT,
1878+
* .value = 1,
1879+
* }
1880+
* };
1881+
* struct drm_i915_gem_context_create_ext_setparam p_norecover = {
1882+
* .base = {
1883+
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
1884+
* .next_extension = to_user_pointer(&p_protected),
1885+
* },
1886+
* .param = {
1887+
* .param = I915_CONTEXT_PARAM_RECOVERABLE,
1888+
* .value = 0,
1889+
* }
1890+
* };
1891+
* struct drm_i915_gem_context_create_ext create = {
1892+
* .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
1893+
* .extensions = to_user_pointer(&p_norecover);
1894+
* };
1895+
*
1896+
* ctx_id = gem_context_create_ext(drm_fd, &create);
1897+
*
1898+
* In addition to the normal failure cases, setting this flag during context
1899+
* creation can result in the following errors:
1900+
*
1901+
* -ENODEV: feature not available
1902+
* -EPERM: trying to mark a recoverable or not bannable context as protected
1903+
*/
1904+
#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
18491905
/* Must be kept compact -- no holes and well documented */
18501906

18511907
__u64 value;
@@ -2049,6 +2105,135 @@ struct i915_context_engines_bond {
20492105
struct i915_engine_class_instance engines[N__]; \
20502106
} __attribute__((packed)) name__
20512107

2108+
/**
2109+
* struct i915_context_engines_parallel_submit - Configure engine for
2110+
* parallel submission.
2111+
*
2112+
* Setup a slot in the context engine map to allow multiple BBs to be submitted
2113+
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
2114+
* in parallel. Multiple hardware contexts are created internally in the i915 to
2115+
* run these BBs. Once a slot is configured for N BBs only N BBs can be
2116+
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
2117+
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
2118+
* many BBs there are based on the slot's configuration. The N BBs are the last
2119+
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
2120+
*
2121+
* The default placement behavior is to create implicit bonds between each
2122+
* context if each context maps to more than 1 physical engine (e.g. context is
2123+
* a virtual engine). Also we only allow contexts of same engine class and these
2124+
* contexts must be in logically contiguous order. Examples of the placement
2125+
* behavior are described below. Lastly, the default is to not allow BBs to be
2126+
* preempted mid-batch. Rather insert coordinated preemption points on all
2127+
* hardware contexts between each set of BBs. Flags could be added in the future
2128+
* to change both of these default behaviors.
2129+
*
2130+
* Returns -EINVAL if hardware context placement configuration is invalid or if
2131+
* the placement configuration isn't supported on the platform / submission
2132+
* interface.
2133+
* Returns -ENODEV if extension isn't supported on the platform / submission
2134+
* interface.
2135+
*
2136+
* .. code-block:: none
2137+
*
2138+
* Examples syntax:
2139+
* CS[X] = generic engine of same class, logical instance X
2140+
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
2141+
*
2142+
* Example 1 pseudo code:
2143+
* set_engines(INVALID)
2144+
* set_parallel(engine_index=0, width=2, num_siblings=1,
2145+
* engines=CS[0],CS[1])
2146+
*
2147+
* Results in the following valid placement:
2148+
* CS[0], CS[1]
2149+
*
2150+
* Example 2 pseudo code:
2151+
* set_engines(INVALID)
2152+
* set_parallel(engine_index=0, width=2, num_siblings=2,
2153+
* engines=CS[0],CS[2],CS[1],CS[3])
2154+
*
2155+
* Results in the following valid placements:
2156+
* CS[0], CS[1]
2157+
* CS[2], CS[3]
2158+
*
2159+
* This can be thought of as two virtual engines, each containing two
2160+
* engines thereby making a 2D array. However, there are bonds tying the
2161+
* entries together and placing restrictions on how they can be scheduled.
2162+
* Specifically, the scheduler can choose only vertical columns from the 2D
2163+
* array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the
2164+
* scheduler wants to submit to CS[0], it must also choose CS[1] and vice
2165+
* versa. Same for CS[2] requires also using CS[3].
2166+
* VE[0] = CS[0], CS[2]
2167+
* VE[1] = CS[1], CS[3]
2168+
*
2169+
* Example 3 pseudo code:
2170+
* set_engines(INVALID)
2171+
* set_parallel(engine_index=0, width=2, num_siblings=2,
2172+
* engines=CS[0],CS[1],CS[1],CS[3])
2173+
*
2174+
* Results in the following valid and invalid placements:
2175+
* CS[0], CS[1]
2176+
* CS[1], CS[3] - Not logically contiguous, return -EINVAL
2177+
*/
2178+
struct i915_context_engines_parallel_submit {
2179+
/**
2180+
* @base: base user extension.
2181+
*/
2182+
struct i915_user_extension base;
2183+
2184+
/**
2185+
* @engine_index: slot for parallel engine
2186+
*/
2187+
__u16 engine_index;
2188+
2189+
/**
2190+
* @width: number of contexts per parallel engine or in other words the
2191+
* number of batches in each submission
2192+
*/
2193+
__u16 width;
2194+
2195+
/**
2196+
* @num_siblings: number of siblings per context or in other words the
2197+
* number of possible placements for each submission
2198+
*/
2199+
__u16 num_siblings;
2200+
2201+
/**
2202+
* @mbz16: reserved for future use; must be zero
2203+
*/
2204+
__u16 mbz16;
2205+
2206+
/**
2207+
* @flags: all undefined flags must be zero, currently not defined flags
2208+
*/
2209+
__u64 flags;
2210+
2211+
/**
2212+
* @mbz64: reserved for future use; must be zero
2213+
*/
2214+
__u64 mbz64[3];
2215+
2216+
/**
2217+
* @engines: 2-d array of engine instances to configure parallel engine
2218+
*
2219+
* length = width (i) * num_siblings (j)
2220+
* index = j + i * num_siblings
2221+
*/
2222+
struct i915_engine_class_instance engines[0];
2223+
2224+
} __packed;
2225+
2226+
#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
2227+
struct i915_user_extension base; \
2228+
__u16 engine_index; \
2229+
__u16 width; \
2230+
__u16 num_siblings; \
2231+
__u16 mbz16; \
2232+
__u64 flags; \
2233+
__u64 mbz64[3]; \
2234+
struct i915_engine_class_instance engines[N__]; \
2235+
} __attribute__((packed)) name__
2236+
20522237
/**
20532238
* DOC: Context Engine Map uAPI
20542239
*
@@ -2108,6 +2293,7 @@ struct i915_context_param_engines {
21082293
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
21092294
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
21102295
#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
2296+
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
21112297
struct i915_engine_class_instance engines[0];
21122298
} __attribute__((packed));
21132299

@@ -2726,14 +2912,20 @@ struct drm_i915_engine_info {
27262912

27272913
/** @flags: Engine flags. */
27282914
__u64 flags;
2915+
#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0)
27292916

27302917
/** @capabilities: Capabilities of this engine. */
27312918
__u64 capabilities;
27322919
#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0)
27332920
#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1)
27342921

2922+
/** @logical_instance: Logical instance of engine */
2923+
__u16 logical_instance;
2924+
27352925
/** @rsvd1: Reserved fields. */
2736-
__u64 rsvd1[4];
2926+
__u16 rsvd1[3];
2927+
/** @rsvd2: Reserved fields. */
2928+
__u64 rsvd2[3];
27372929
};
27382930

27392931
/**
@@ -2979,8 +3171,12 @@ struct drm_i915_gem_create_ext {
29793171
*
29803172
* For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see
29813173
* struct drm_i915_gem_create_ext_memory_regions.
3174+
*
3175+
* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
3176+
* struct drm_i915_gem_create_ext_protected_content.
29823177
*/
29833178
#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
3179+
#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
29843180
__u64 extensions;
29853181
};
29863182

@@ -3038,6 +3234,50 @@ struct drm_i915_gem_create_ext_memory_regions {
30383234
__u64 regions;
30393235
};
30403236

3237+
/**
3238+
* struct drm_i915_gem_create_ext_protected_content - The
3239+
* I915_OBJECT_PARAM_PROTECTED_CONTENT extension.
3240+
*
3241+
* If this extension is provided, buffer contents are expected to be protected
3242+
* by PXP encryption and require decryption for scan out and processing. This
3243+
* is only possible on platforms that have PXP enabled, on all other scenarios
3244+
* using this extension will cause the ioctl to fail and return -ENODEV. The
3245+
* flags parameter is reserved for future expansion and must currently be set
3246+
* to zero.
3247+
*
3248+
* The buffer contents are considered invalid after a PXP session teardown.
3249+
*
3250+
* The encryption is guaranteed to be processed correctly only if the object
3251+
* is submitted with a context created using the
3252+
* I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks
3253+
* at submission time on the validity of the objects involved.
3254+
*
3255+
* Below is an example on how to create a protected object:
3256+
*
3257+
* .. code-block:: C
3258+
*
3259+
* struct drm_i915_gem_create_ext_protected_content protected_ext = {
3260+
* .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
3261+
* .flags = 0,
3262+
* };
3263+
* struct drm_i915_gem_create_ext create_ext = {
3264+
* .size = PAGE_SIZE,
3265+
* .extensions = (uintptr_t)&protected_ext,
3266+
* };
3267+
*
3268+
* int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
3269+
* if (err) ...
3270+
*/
3271+
struct drm_i915_gem_create_ext_protected_content {
3272+
/** @base: Extension link. See struct i915_user_extension. */
3273+
struct i915_user_extension base;
3274+
/** @flags: reserved for future usage, currently MBZ */
3275+
__u32 flags;
3276+
};
3277+
3278+
/* ID of the protected content session managed by i915 when PXP is active */
3279+
#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
3280+
30413281
#if defined(__cplusplus)
30423282
}
30433283
#endif

tools/include/uapi/linux/prctl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,5 +268,8 @@ struct prctl_mm_map {
268268
# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
269269
# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
270270
# define PR_SCHED_CORE_MAX 4
271+
# define PR_SCHED_CORE_SCOPE_THREAD 0
272+
# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
273+
# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
271274

272275
#endif /* _LINUX_PRCTL_H */

tools/include/uapi/sound/asound.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1002,7 +1002,7 @@ typedef int __bitwise snd_ctl_elem_iface_t;
10021002
#define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1)
10031003
#define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE)
10041004
#define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */
1005-
// (1 << 3) is unused.
1005+
/* (1 << 3) is unused. */
10061006
#define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */
10071007
#define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */
10081008
#define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)

tools/perf/Documentation/perf-record.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ This option sets the time out limit. The default value is 500 ms.
469469

470470
--switch-events::
471471
Record context switch events i.e. events of type PERF_RECORD_SWITCH or
472-
PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight)
472+
PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE)
473473
switch events will be enabled automatically, which can be suppressed by
474474
by the option --no-switch-events.
475475

0 commit comments

Comments
 (0)