Skip to content

Commit 34ef767

Browse files
committed
tcg/s390x: Add host vector framework
Add registers and function stubs. The functionality is disabled via squashing s390_facilities[2] to 0. We must still include results for the mandatory opcodes in tcg_target_op_def, as all opcodes are checked during tcg init. Reviewed-by: David Hildenbrand <[email protected]> Signed-off-by: Richard Henderson <[email protected]>
1 parent eee6251 commit 34ef767

File tree

5 files changed

+184
-5
lines changed

5 files changed

+184
-5
lines changed

tcg/s390x/tcg-target-con-set.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,17 @@ C_O0_I1(r)
1313
C_O0_I2(L, L)
1414
C_O0_I2(r, r)
1515
C_O0_I2(r, ri)
16+
C_O0_I2(v, r)
1617
C_O1_I1(r, L)
1718
C_O1_I1(r, r)
19+
C_O1_I1(v, r)
20+
C_O1_I1(v, vr)
1821
C_O1_I2(r, 0, ri)
1922
C_O1_I2(r, 0, rI)
2023
C_O1_I2(r, 0, rJ)
2124
C_O1_I2(r, r, ri)
2225
C_O1_I2(r, rZ, r)
26+
C_O1_I2(v, v, v)
2327
C_O1_I4(r, r, ri, r, 0)
2428
C_O1_I4(r, r, ri, rI, 0)
2529
C_O2_I2(b, a, 0, r)

tcg/s390x/tcg-target-con-str.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
*/
1111
REGS('r', ALL_GENERAL_REGS)
1212
REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
13+
REGS('v', ALL_VECTOR_REGS)
1314
/*
1415
* A (single) even/odd pair for division.
1516
* TODO: Add something to the register allocator to allow

tcg/s390x/tcg-target.c.inc

Lines changed: 134 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#define TCG_CT_CONST_ZERO 0x800
4444

4545
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
46+
#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
47+
4648
/*
4749
* For softmmu, we need to avoid conflicts with the first 3
4850
* argument registers to perform the tlb lookup, and to call
@@ -268,8 +270,13 @@ typedef enum S390Opcode {
268270

269271
#ifdef CONFIG_DEBUG_TCG
270272
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
271-
"%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
272-
"%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15"
273+
"%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
274+
"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
275+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276+
"%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
277+
"%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
278+
"%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
279+
"%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
273280
};
274281
#endif
275282

@@ -295,6 +302,32 @@ static const int tcg_target_reg_alloc_order[] = {
295302
TCG_REG_R4,
296303
TCG_REG_R3,
297304
TCG_REG_R2,
305+
306+
/* V8-V15 are call saved, and omitted. */
307+
TCG_REG_V0,
308+
TCG_REG_V1,
309+
TCG_REG_V2,
310+
TCG_REG_V3,
311+
TCG_REG_V4,
312+
TCG_REG_V5,
313+
TCG_REG_V6,
314+
TCG_REG_V7,
315+
TCG_REG_V16,
316+
TCG_REG_V17,
317+
TCG_REG_V18,
318+
TCG_REG_V19,
319+
TCG_REG_V20,
320+
TCG_REG_V21,
321+
TCG_REG_V22,
322+
TCG_REG_V23,
323+
TCG_REG_V24,
324+
TCG_REG_V25,
325+
TCG_REG_V26,
326+
TCG_REG_V27,
327+
TCG_REG_V28,
328+
TCG_REG_V29,
329+
TCG_REG_V30,
330+
TCG_REG_V31,
298331
};
299332

300333
static const int tcg_target_call_iarg_regs[] = {
@@ -377,7 +410,7 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
377410
#endif
378411

379412
static const tcg_insn_unit *tb_ret_addr;
380-
uint64_t s390_facilities[1];
413+
uint64_t s390_facilities[3];
381414

382415
static bool patch_reloc(tcg_insn_unit *src_rw, int type,
383416
intptr_t value, intptr_t addend)
@@ -2293,6 +2326,42 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
22932326
}
22942327
}
22952328

2329+
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2330+
TCGReg dst, TCGReg src)
2331+
{
2332+
g_assert_not_reached();
2333+
}
2334+
2335+
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2336+
TCGReg dst, TCGReg base, intptr_t offset)
2337+
{
2338+
g_assert_not_reached();
2339+
}
2340+
2341+
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2342+
TCGReg dst, int64_t val)
2343+
{
2344+
g_assert_not_reached();
2345+
}
2346+
2347+
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2348+
unsigned vecl, unsigned vece,
2349+
const TCGArg *args, const int *const_args)
2350+
{
2351+
g_assert_not_reached();
2352+
}
2353+
2354+
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2355+
{
2356+
return 0;
2357+
}
2358+
2359+
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2360+
TCGArg a0, ...)
2361+
{
2362+
g_assert_not_reached();
2363+
}
2364+
22962365
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
22972366
{
22982367
switch (op) {
@@ -2433,11 +2502,34 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
24332502
? C_O2_I4(r, r, 0, 1, rA, r)
24342503
: C_O2_I4(r, r, 0, 1, r, r));
24352504

2505+
case INDEX_op_st_vec:
2506+
return C_O0_I2(v, r);
2507+
case INDEX_op_ld_vec:
2508+
case INDEX_op_dupm_vec:
2509+
return C_O1_I1(v, r);
2510+
case INDEX_op_dup_vec:
2511+
return C_O1_I1(v, vr);
2512+
case INDEX_op_add_vec:
2513+
case INDEX_op_sub_vec:
2514+
case INDEX_op_and_vec:
2515+
case INDEX_op_or_vec:
2516+
case INDEX_op_xor_vec:
2517+
case INDEX_op_cmp_vec:
2518+
return C_O1_I2(v, v, v);
2519+
24362520
default:
24372521
g_assert_not_reached();
24382522
}
24392523
}
24402524

2525+
/*
2526+
* Mainline glibc added HWCAP_S390_VX before it was kernel abi.
2527+
* Some distros have fixed this up locally, others have not.
2528+
*/
2529+
#ifndef HWCAP_S390_VXRS
2530+
#define HWCAP_S390_VXRS 2048
2531+
#endif
2532+
24412533
static void query_s390_facilities(void)
24422534
{
24432535
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
@@ -2452,6 +2544,16 @@ static void query_s390_facilities(void)
24522544
asm volatile(".word 0xb2b0,0x1000"
24532545
: "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
24542546
}
2547+
2548+
/*
2549+
* Use of vector registers requires os support beyond the facility bit.
2550+
* If the kernel does not advertise support, disable the facility bits.
2551+
* There is nothing else we currently care about in the 3rd word, so
2552+
* disable VECTOR with one store.
2553+
*/
2554+
if (1 || !(hwcap & HWCAP_S390_VXRS)) {
2555+
s390_facilities[2] = 0;
2556+
}
24552557
}
24562558

24572559
static void tcg_target_init(TCGContext *s)
@@ -2460,6 +2562,10 @@ static void tcg_target_init(TCGContext *s)
24602562

24612563
tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
24622564
tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
2565+
if (HAVE_FACILITY(VECTOR)) {
2566+
tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2567+
tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2568+
}
24632569

24642570
tcg_target_call_clobber_regs = 0;
24652571
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
@@ -2474,6 +2580,31 @@ static void tcg_target_init(TCGContext *s)
24742580
/* The return register can be considered call-clobbered. */
24752581
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
24762582

2583+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
2584+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
2585+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
2586+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
2587+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
2588+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
2589+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
2590+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
2591+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
2592+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
2593+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
2594+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
2595+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
2596+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
2597+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
2598+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
2599+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
2600+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
2601+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
2602+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
2603+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
2604+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
2605+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
2606+
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
2607+
24772608
s->reserved_regs = 0;
24782609
tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
24792610
/* XXX many insns can't be used with R0, so we better avoid it for now */

tcg/s390x/tcg-target.h

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,20 @@ typedef enum TCGReg {
3737
TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11,
3838
TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
3939

40+
TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
41+
TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
42+
TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
43+
TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
44+
TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
45+
TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
46+
TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
47+
TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
48+
4049
TCG_AREG0 = TCG_REG_R10,
4150
TCG_REG_CALL_STACK = TCG_REG_R15
4251
} TCGReg;
4352

44-
#define TCG_TARGET_NB_REGS 16
53+
#define TCG_TARGET_NB_REGS 64
4554

4655
/* A list of relevant facilities used by this translator. Some of these
4756
are required for proper operation, and these are checked at startup. */
@@ -54,8 +63,9 @@ typedef enum TCGReg {
5463
#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
5564
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
5665
#define FACILITY_LOAD_ON_COND2 53
66+
#define FACILITY_VECTOR 129
5767

58-
extern uint64_t s390_facilities[1];
68+
extern uint64_t s390_facilities[3];
5969

6070
#define HAVE_FACILITY(X) \
6171
((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
@@ -128,6 +138,27 @@ extern uint64_t s390_facilities[1];
128138
#define TCG_TARGET_HAS_muluh_i64 0
129139
#define TCG_TARGET_HAS_mulsh_i64 0
130140

141+
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
142+
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
143+
#define TCG_TARGET_HAS_v256 0
144+
145+
#define TCG_TARGET_HAS_andc_vec 0
146+
#define TCG_TARGET_HAS_orc_vec 0
147+
#define TCG_TARGET_HAS_not_vec 0
148+
#define TCG_TARGET_HAS_neg_vec 0
149+
#define TCG_TARGET_HAS_abs_vec 0
150+
#define TCG_TARGET_HAS_roti_vec 0
151+
#define TCG_TARGET_HAS_rots_vec 0
152+
#define TCG_TARGET_HAS_rotv_vec 0
153+
#define TCG_TARGET_HAS_shi_vec 0
154+
#define TCG_TARGET_HAS_shs_vec 0
155+
#define TCG_TARGET_HAS_shv_vec 0
156+
#define TCG_TARGET_HAS_mul_vec 0
157+
#define TCG_TARGET_HAS_sat_vec 0
158+
#define TCG_TARGET_HAS_minmax_vec 0
159+
#define TCG_TARGET_HAS_bitsel_vec 0
160+
#define TCG_TARGET_HAS_cmpsel_vec 0
161+
131162
/* used for function call generation */
132163
#define TCG_TARGET_STACK_ALIGN 8
133164
#define TCG_TARGET_CALL_STACK_OFFSET 160

tcg/s390x/tcg-target.opc.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/*
2+
* Copyright (c) 2021 Linaro
3+
*
4+
* This work is licensed under the terms of the GNU GPL, version 2 or
5+
* (at your option) any later version.
6+
*
7+
* See the COPYING file in the top-level directory for details.
8+
*
9+
* Target-specific opcodes for host vector expansion. These will be
10+
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
11+
* consider these to be UNSPEC with names.
12+
*/

0 commit comments

Comments
 (0)