Skip to content

Commit 8ff499f

Browse files
committed
nv2a: Fix R12 input in paired/multiout ops writing to oPos
Paired MAC+ILU and duplicate output instructions can write to oPos while reading from R12. This leads to a case where xemu's serialized emulation erroneously uses the output of a previous instruction when calculating the value of a later one. E.g., in ``` /* 0x00000000 0x0080201A 0xC4002868 0x7CB0E800 */ MAD oPos.xyz, R12.xyz, R1.x, C[1].xyz + MAD R11.xy, R12.xyz, R1.x, C[1].xyz ``` the value of oPos prior to the first instruction should be used for both MAD calculations. This could alternatively be fixed by writing oPos to a temp vector and deferring the output vector update until after the token is fully processed. Note that we always process output registers writes before temp register writes so the only interesting case should be the oPos/R12 alias. An op that writes to one of its temp inputs will always execute the non-modifying output register write before updating the temp register. [Tests](https://github.com/abaire/nxdk_pgraph_tests/blob/1745a45290a5d607f9582303d6882cbf62f003de/src/tests/vertex_shader_independence_tests.cpp#L125) [HW results](https://abaire.github.io/nxdk_pgraph_tests_golden_results/results/Vertex_shader_independence_tests/index.html) Fixes #1864
1 parent 90ac1b1 commit 8ff499f

File tree

1 file changed

+61
-38
lines changed

1 file changed

+61
-38
lines changed

hw/xbox/nv2a/pgraph/glsl/vsh-prog.c

Lines changed: 61 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ static bool ilu_force_scalar[] = {
209209
false,
210210
};
211211

212+
#define OUTPUT_REG_R12 0
212213
#define OUTPUT_REG_FOG 5
213214

214215
static const char* out_reg_name[] = {
@@ -296,15 +297,15 @@ static MString *decode_swizzle(const uint32_t *shader_token,
296297

297298
static MString *decode_opcode_input(const uint32_t *shader_token,
298299
VshParameterType param,
299-
VshFieldName neg_field, int reg_num)
300+
VshFieldName neg_field, int reg_num,
301+
bool *uses_r12_latch)
300302
{
301303
/* This function decodes a vertex shader opcode parameter into a string.
302304
* Input A, B or C is controlled via the Param and NEG fieldnames,
303305
* the R-register address for each input is already given by caller. */
304306

305307
MString *ret_str = mstring_new();
306308

307-
308309
if (vsh_get_field(shader_token, neg_field) > 0) {
309310
mstring_append_fmt(ret_str, "-");
310311
}
@@ -315,6 +316,9 @@ static MString *decode_opcode_input(const uint32_t *shader_token,
315316
switch (param) {
316317
case PARAM_R:
317318
snprintf(tmp, sizeof(tmp), "R%d", reg_num);
319+
if (reg_num == 12) {
320+
*uses_r12_latch = true;
321+
}
318322
break;
319323
case PARAM_V:
320324
reg_num = vsh_get_field(shader_token, FLD_V);
@@ -346,12 +350,12 @@ static MString *decode_opcode_input(const uint32_t *shader_token,
346350
return ret_str;
347351
}
348352

349-
static MString *decode_opcode(const uint32_t *shader_token,
350-
VshOutputMux out_mux, uint32_t mask,
351-
const char *opcode, const char *inputs,
352-
MString **suffix)
353+
static void decode_opcode(MString *ret, const uint32_t *shader_token,
354+
VshOutputMux out_mux, uint32_t mask,
355+
const char *opcode, const char *inputs,
356+
MString **suffix,
357+
bool *r12_is_stale)
353358
{
354-
MString *ret = mstring_new();
355359
int reg_num = vsh_get_field(shader_token, FLD_OUT_R);
356360
bool use_temp_var = false;
357361

@@ -369,6 +373,12 @@ static MString *decode_opcode(const uint32_t *shader_token,
369373
reg_num = 1;
370374
}
371375

376+
VshOutputType out_type = vsh_get_field(shader_token, FLD_OUT_ORB);
377+
int out_reg = vsh_get_field(shader_token, FLD_OUT_ADDRESS) & 0xF;
378+
if (out_type == OUTPUT_O && out_reg == OUTPUT_REG_R12) {
379+
*r12_is_stale = true;
380+
}
381+
372382
/* See if we must add a muxed opcode too: */
373383
if (vsh_get_field(shader_token, FLD_OUT_MUX) == out_mux
374384
/* Only if it's not masked away: */
@@ -379,13 +389,12 @@ static MString *decode_opcode(const uint32_t *shader_token,
379389
mstring_append(ret, "(");
380390

381391
bool write_fog_register = false;
382-
if (vsh_get_field(shader_token, FLD_OUT_ORB) == OUTPUT_C) {
392+
if (out_type == OUTPUT_C) {
383393
assert(!"TODO: Emulate writeable const registers");
384394
mstring_append_fmt(ret, "c%d",
385395
convert_c_register(vsh_get_field(
386396
shader_token, FLD_OUT_ADDRESS)));
387397
} else {
388-
int out_reg = vsh_get_field(shader_token, FLD_OUT_ADDRESS) & 0xF;
389398
mstring_append(ret,out_reg_name[out_reg]);
390399
write_fog_register = out_reg == OUTPUT_REG_FOG;
391400
}
@@ -427,14 +436,10 @@ static MString *decode_opcode(const uint32_t *shader_token,
427436
opcode, reg_num, mask_str[mask], inputs);
428437
}
429438
}
430-
431-
return ret;
432439
}
433440

434-
static MString *decode_token(const uint32_t *shader_token)
441+
static MString *decode_token(const uint32_t *shader_token, bool *r12_is_stale)
435442
{
436-
MString *ret;
437-
438443
/* See what MAC opcode is written to (if not masked away): */
439444
VshMAC mac = vsh_get_field(shader_token, FLD_MAC);
440445
/* See if a ILU opcode is present too: */
@@ -443,23 +448,29 @@ static MString *decode_token(const uint32_t *shader_token)
443448
return mstring_new();
444449
}
445450

451+
bool uses_r12 = false;
452+
446453
/* Since it's potentially used twice, decode input C once: */
447454
MString *input_c =
448455
decode_opcode_input(shader_token,
449456
vsh_get_field(shader_token, FLD_C_MUX),
450457
FLD_C_NEG,
451458
(vsh_get_field(shader_token, FLD_C_R_HIGH) << 2)
452-
| vsh_get_field(shader_token, FLD_C_R_LOW));
459+
| vsh_get_field(shader_token, FLD_C_R_LOW),
460+
&uses_r12);
453461

454462
MString *mac_suffix = NULL;
463+
MString *inputs_mac = NULL;
464+
455465
if (mac != MAC_NOP) {
456-
MString *inputs_mac = mstring_new();
466+
inputs_mac = mstring_new();
457467
if (mac_opcode_params[mac].A) {
458468
MString *input_a =
459469
decode_opcode_input(shader_token,
460470
vsh_get_field(shader_token, FLD_A_MUX),
461471
FLD_A_NEG,
462-
vsh_get_field(shader_token, FLD_A_R));
472+
vsh_get_field(shader_token, FLD_A_R),
473+
&uses_r12);
463474
mstring_append(inputs_mac, ", ");
464475
mstring_append(inputs_mac, mstring_get_str(input_a));
465476
mstring_unref(input_a);
@@ -469,7 +480,8 @@ static MString *decode_token(const uint32_t *shader_token)
469480
decode_opcode_input(shader_token,
470481
vsh_get_field(shader_token, FLD_B_MUX),
471482
FLD_B_NEG,
472-
vsh_get_field(shader_token, FLD_B_R));
483+
vsh_get_field(shader_token, FLD_B_R),
484+
&uses_r12);
473485
mstring_append(inputs_mac, ", ");
474486
mstring_append(inputs_mac, mstring_get_str(input_b));
475487
mstring_unref(input_b);
@@ -478,36 +490,42 @@ static MString *decode_token(const uint32_t *shader_token)
478490
mstring_append(inputs_mac, ", ");
479491
mstring_append(inputs_mac, mstring_get_str(input_c));
480492
}
493+
}
481494

495+
MString *ret = mstring_new();
496+
if (uses_r12 && *r12_is_stale) {
497+
mstring_append(ret, " R12 = oPos;\n");
498+
*r12_is_stale = false;
499+
}
500+
501+
if (mac != MAC_NOP) {
482502
/* Then prepend these inputs with the actual opcode, mask, and input : */
483-
ret = decode_opcode(shader_token,
484-
OMUX_MAC,
485-
vsh_get_field(shader_token, FLD_OUT_MAC_MASK),
486-
mac_opcode[mac],
487-
mstring_get_str(inputs_mac),
488-
&mac_suffix);
503+
decode_opcode(ret,
504+
shader_token,
505+
OMUX_MAC,
506+
vsh_get_field(shader_token, FLD_OUT_MAC_MASK),
507+
mac_opcode[mac],
508+
mstring_get_str(inputs_mac),
509+
&mac_suffix,
510+
r12_is_stale);
489511
mstring_unref(inputs_mac);
490-
} else {
491-
ret = mstring_new();
492512
}
493513

494514
if (ilu != ILU_NOP) {
495515
MString *inputs_c = mstring_from_str(", ");
496516
mstring_append(inputs_c, mstring_get_str(input_c));
497517

498518
/* Append the ILU opcode, mask and (the already determined) input C: */
499-
MString *ilu_op =
500-
decode_opcode(shader_token,
501-
OMUX_ILU,
502-
vsh_get_field(shader_token, FLD_OUT_ILU_MASK),
503-
ilu_opcode[ilu],
504-
mstring_get_str(inputs_c),
505-
NULL);
506-
507-
mstring_append(ret, mstring_get_str(ilu_op));
519+
decode_opcode(ret,
520+
shader_token,
521+
OMUX_ILU,
522+
vsh_get_field(shader_token, FLD_OUT_ILU_MASK),
523+
ilu_opcode[ilu],
524+
mstring_get_str(inputs_c),
525+
NULL,
526+
r12_is_stale);
508527

509528
mstring_unref(inputs_c);
510-
mstring_unref(ilu_op);
511529
}
512530

513531
mstring_unref(input_c);
@@ -536,7 +554,11 @@ static const char* vsh_header =
536554
"vec4 R9 = vec4(0.0,0.0,0.0,0.0);\n"
537555
"vec4 R10 = vec4(0.0,0.0,0.0,0.0);\n"
538556
"vec4 R11 = vec4(0.0,0.0,0.0,0.0);\n"
539-
"#define R12 oPos\n" /* R12 is a mirror of oPos */
557+
558+
/* R12 is a mirror of oPos and is updated on demand to facilitate
559+
* multi-output / MAC+ILU pairs
560+
*/
561+
"vec4 R12 = vec4(0.0,0.0,0.0,0.0);\n"
540562
"\n"
541563

542564
/* Used to emulate concurrency of paired MAC+ILU instructions */
@@ -730,12 +752,13 @@ void pgraph_glsl_gen_vsh_prog(uint16_t version, const uint32_t *tokens,
730752

731753
mstring_append(header, vsh_header);
732754

755+
bool r12_is_stale = true;
733756
bool has_final = false;
734757
int slot;
735758

736759
for (slot=0; slot < length; slot++) {
737760
const uint32_t* cur_token = &tokens[slot * VSH_TOKEN_SIZE];
738-
MString *token_str = decode_token(cur_token);
761+
MString *token_str = decode_token(cur_token, &r12_is_stale);
739762
mstring_append_fmt(body,
740763
" /* Slot %d: 0x%08X 0x%08X 0x%08X 0x%08X */\n"
741764
" %s\n",

0 commit comments

Comments
 (0)