Skip to content

Commit edc15c1

Browse files
committed
nv2a: Fix R12 input in paired/multiout ops writing to oPos
Paired MAC+ILU and duplicate output instructions can write to oPos while reading from R12. This leads to a case where xemu's serialized emulation erroneously uses the output of a previous instruction when calculating the value of a later one. E.g., in ``` /* 0x00000000 0x0080201A 0xC4002868 0x7CB0E800 */ MAD oPos.xyz, R12.xyz, R1.x, C[1].xyz + MAD R11.xy, R12.xyz, R1.x, C[1].xyz ``` the value of oPos prior to the first instruction should be used for both MAD calculations. This could alternatively be fixed by writing oPos to a temp vector and deferring the output vector update until after the token is fully processed. Fixes #1864
1 parent 90ac1b1 commit edc15c1

File tree

1 file changed

+61
-38
lines changed

1 file changed

+61
-38
lines changed

hw/xbox/nv2a/pgraph/glsl/vsh-prog.c

Lines changed: 61 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ static bool ilu_force_scalar[] = {
209209
false,
210210
};
211211

212+
#define OUTPUT_REG_R12 0
212213
#define OUTPUT_REG_FOG 5
213214

214215
static const char* out_reg_name[] = {
@@ -296,15 +297,15 @@ static MString *decode_swizzle(const uint32_t *shader_token,
296297

297298
static MString *decode_opcode_input(const uint32_t *shader_token,
298299
VshParameterType param,
299-
VshFieldName neg_field, int reg_num)
300+
VshFieldName neg_field, int reg_num,
301+
bool *uses_r12_latch)
300302
{
301303
/* This function decodes a vertex shader opcode parameter into a string.
302304
* Input A, B or C is controlled via the Param and NEG fieldnames,
303305
* the R-register address for each input is already given by caller. */
304306

305307
MString *ret_str = mstring_new();
306308

307-
308309
if (vsh_get_field(shader_token, neg_field) > 0) {
309310
mstring_append_fmt(ret_str, "-");
310311
}
@@ -315,6 +316,9 @@ static MString *decode_opcode_input(const uint32_t *shader_token,
315316
switch (param) {
316317
case PARAM_R:
317318
snprintf(tmp, sizeof(tmp), "R%d", reg_num);
319+
if (reg_num == 12) {
320+
*uses_r12_latch = true;
321+
}
318322
break;
319323
case PARAM_V:
320324
reg_num = vsh_get_field(shader_token, FLD_V);
@@ -346,12 +350,12 @@ static MString *decode_opcode_input(const uint32_t *shader_token,
346350
return ret_str;
347351
}
348352

349-
static MString *decode_opcode(const uint32_t *shader_token,
350-
VshOutputMux out_mux, uint32_t mask,
351-
const char *opcode, const char *inputs,
352-
MString **suffix)
353+
static void decode_opcode(MString *ret, const uint32_t *shader_token,
354+
VshOutputMux out_mux, uint32_t mask,
355+
const char *opcode, const char *inputs,
356+
MString **suffix,
357+
bool *r12_is_stale)
353358
{
354-
MString *ret = mstring_new();
355359
int reg_num = vsh_get_field(shader_token, FLD_OUT_R);
356360
bool use_temp_var = false;
357361

@@ -369,6 +373,12 @@ static MString *decode_opcode(const uint32_t *shader_token,
369373
reg_num = 1;
370374
}
371375

376+
VshOutputType out_type = vsh_get_field(shader_token, FLD_OUT_ORB);
377+
int out_reg = vsh_get_field(shader_token, FLD_OUT_ADDRESS) & 0xF;
378+
if (out_type == OUTPUT_O && out_reg == OUTPUT_REG_R12) {
379+
*r12_is_stale = true;
380+
}
381+
372382
/* See if we must add a muxed opcode too: */
373383
if (vsh_get_field(shader_token, FLD_OUT_MUX) == out_mux
374384
/* Only if it's not masked away: */
@@ -379,13 +389,12 @@ static MString *decode_opcode(const uint32_t *shader_token,
379389
mstring_append(ret, "(");
380390

381391
bool write_fog_register = false;
382-
if (vsh_get_field(shader_token, FLD_OUT_ORB) == OUTPUT_C) {
392+
if (out_type == OUTPUT_C) {
383393
assert(!"TODO: Emulate writeable const registers");
384394
mstring_append_fmt(ret, "c%d",
385395
convert_c_register(vsh_get_field(
386396
shader_token, FLD_OUT_ADDRESS)));
387397
} else {
388-
int out_reg = vsh_get_field(shader_token, FLD_OUT_ADDRESS) & 0xF;
389398
mstring_append(ret,out_reg_name[out_reg]);
390399
write_fog_register = out_reg == OUTPUT_REG_FOG;
391400
}
@@ -427,14 +436,10 @@ static MString *decode_opcode(const uint32_t *shader_token,
427436
opcode, reg_num, mask_str[mask], inputs);
428437
}
429438
}
430-
431-
return ret;
432439
}
433440

434-
static MString *decode_token(const uint32_t *shader_token)
441+
static MString *decode_token(const uint32_t *shader_token, bool *r12_is_stale)
435442
{
436-
MString *ret;
437-
438443
/* See what MAC opcode is written to (if not masked away): */
439444
VshMAC mac = vsh_get_field(shader_token, FLD_MAC);
440445
/* See if a ILU opcode is present too: */
@@ -443,23 +448,29 @@ static MString *decode_token(const uint32_t *shader_token)
443448
return mstring_new();
444449
}
445450

451+
bool uses_r12 = false;
452+
446453
/* Since it's potentially used twice, decode input C once: */
447454
MString *input_c =
448455
decode_opcode_input(shader_token,
449456
vsh_get_field(shader_token, FLD_C_MUX),
450457
FLD_C_NEG,
451458
(vsh_get_field(shader_token, FLD_C_R_HIGH) << 2)
452-
| vsh_get_field(shader_token, FLD_C_R_LOW));
459+
| vsh_get_field(shader_token, FLD_C_R_LOW),
460+
&uses_r12);
453461

454462
MString *mac_suffix = NULL;
463+
MString *inputs_mac = NULL;
464+
455465
if (mac != MAC_NOP) {
456-
MString *inputs_mac = mstring_new();
466+
inputs_mac = mstring_new();
457467
if (mac_opcode_params[mac].A) {
458468
MString *input_a =
459469
decode_opcode_input(shader_token,
460470
vsh_get_field(shader_token, FLD_A_MUX),
461471
FLD_A_NEG,
462-
vsh_get_field(shader_token, FLD_A_R));
472+
vsh_get_field(shader_token, FLD_A_R),
473+
&uses_r12);
463474
mstring_append(inputs_mac, ", ");
464475
mstring_append(inputs_mac, mstring_get_str(input_a));
465476
mstring_unref(input_a);
@@ -469,7 +480,8 @@ static MString *decode_token(const uint32_t *shader_token)
469480
decode_opcode_input(shader_token,
470481
vsh_get_field(shader_token, FLD_B_MUX),
471482
FLD_B_NEG,
472-
vsh_get_field(shader_token, FLD_B_R));
483+
vsh_get_field(shader_token, FLD_B_R),
484+
&uses_r12);
473485
mstring_append(inputs_mac, ", ");
474486
mstring_append(inputs_mac, mstring_get_str(input_b));
475487
mstring_unref(input_b);
@@ -478,36 +490,42 @@ static MString *decode_token(const uint32_t *shader_token)
478490
mstring_append(inputs_mac, ", ");
479491
mstring_append(inputs_mac, mstring_get_str(input_c));
480492
}
493+
}
481494

495+
MString *ret = mstring_new();
496+
if (uses_r12 && *r12_is_stale) {
497+
mstring_append(ret, " R12 = oPos;\n");
498+
*r12_is_stale = false;
499+
}
500+
501+
if (mac != MAC_NOP) {
482502
/* Then prepend these inputs with the actual opcode, mask, and input : */
483-
ret = decode_opcode(shader_token,
484-
OMUX_MAC,
485-
vsh_get_field(shader_token, FLD_OUT_MAC_MASK),
486-
mac_opcode[mac],
487-
mstring_get_str(inputs_mac),
488-
&mac_suffix);
503+
decode_opcode(ret,
504+
shader_token,
505+
OMUX_MAC,
506+
vsh_get_field(shader_token, FLD_OUT_MAC_MASK),
507+
mac_opcode[mac],
508+
mstring_get_str(inputs_mac),
509+
&mac_suffix,
510+
r12_is_stale);
489511
mstring_unref(inputs_mac);
490-
} else {
491-
ret = mstring_new();
492512
}
493513

494514
if (ilu != ILU_NOP) {
495515
MString *inputs_c = mstring_from_str(", ");
496516
mstring_append(inputs_c, mstring_get_str(input_c));
497517

498518
/* Append the ILU opcode, mask and (the already determined) input C: */
499-
MString *ilu_op =
500-
decode_opcode(shader_token,
501-
OMUX_ILU,
502-
vsh_get_field(shader_token, FLD_OUT_ILU_MASK),
503-
ilu_opcode[ilu],
504-
mstring_get_str(inputs_c),
505-
NULL);
506-
507-
mstring_append(ret, mstring_get_str(ilu_op));
519+
decode_opcode(ret,
520+
shader_token,
521+
OMUX_ILU,
522+
vsh_get_field(shader_token, FLD_OUT_ILU_MASK),
523+
ilu_opcode[ilu],
524+
mstring_get_str(inputs_c),
525+
NULL,
526+
r12_is_stale);
508527

509528
mstring_unref(inputs_c);
510-
mstring_unref(ilu_op);
511529
}
512530

513531
mstring_unref(input_c);
@@ -536,7 +554,11 @@ static const char* vsh_header =
536554
"vec4 R9 = vec4(0.0,0.0,0.0,0.0);\n"
537555
"vec4 R10 = vec4(0.0,0.0,0.0,0.0);\n"
538556
"vec4 R11 = vec4(0.0,0.0,0.0,0.0);\n"
539-
"#define R12 oPos\n" /* R12 is a mirror of oPos */
557+
558+
/* R12 is a mirror of oPos and is updated on demand to facilitate
559+
* multi-output / MAC+ILU pairs
560+
*/
561+
"vec4 R12 = vec4(0.0,0.0,0.0,0.0);\n"
540562
"\n"
541563

542564
/* Used to emulate concurrency of paired MAC+ILU instructions */
@@ -730,12 +752,13 @@ void pgraph_glsl_gen_vsh_prog(uint16_t version, const uint32_t *tokens,
730752

731753
mstring_append(header, vsh_header);
732754

755+
bool r12_is_stale = true;
733756
bool has_final = false;
734757
int slot;
735758

736759
for (slot=0; slot < length; slot++) {
737760
const uint32_t* cur_token = &tokens[slot * VSH_TOKEN_SIZE];
738-
MString *token_str = decode_token(cur_token);
761+
MString *token_str = decode_token(cur_token, &r12_is_stale);
739762
mstring_append_fmt(body,
740763
" /* Slot %d: 0x%08X 0x%08X 0x%08X 0x%08X */\n"
741764
" %s\n",

0 commit comments

Comments
 (0)