Skip to content

Commit 32d2b5d

Browse files
committed
nv2a: Fix R12 input in paired/multiout ops writing to oPos
Paired MAC+ILU and duplicate output instructions can write to oPos while reading from R12. This leads to a case where xemu's serialized emulation erroneously uses the output of a previous instruction when calculating the value of a later one. E.g., in ``` /* 0x00000000 0x0080201A 0xC4002868 0x7CB0E800 */ MAD oPos.xyz, R12.xyz, R1.x, C[1].xyz + MAD R11.xy, R12.xyz, R1.x, C[1].xyz ``` the value of oPos prior to the first instruction should be used for both MAD calculations. This could alternatively be fixed by writing oPos to a temp vector and deferring the output vector update until after the token is fully processed. Fixes #1864
1 parent 90ac1b1 commit 32d2b5d

File tree

1 file changed

+45
-34
lines changed

1 file changed

+45
-34
lines changed

hw/xbox/nv2a/pgraph/glsl/vsh-prog.c

Lines changed: 45 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -296,15 +296,15 @@ static MString *decode_swizzle(const uint32_t *shader_token,
296296

297297
static MString *decode_opcode_input(const uint32_t *shader_token,
298298
VshParameterType param,
299-
VshFieldName neg_field, int reg_num)
299+
VshFieldName neg_field, int reg_num,
300+
bool *uses_r12_latch)
300301
{
301302
/* This function decodes a vertex shader opcode parameter into a string.
302303
* Input A, B or C is controlled via the Param and NEG fieldnames,
303304
* the R-register address for each input is already given by caller. */
304305

305306
MString *ret_str = mstring_new();
306307

307-
308308
if (vsh_get_field(shader_token, neg_field) > 0) {
309309
mstring_append_fmt(ret_str, "-");
310310
}
@@ -315,6 +315,9 @@ static MString *decode_opcode_input(const uint32_t *shader_token,
315315
switch (param) {
316316
case PARAM_R:
317317
snprintf(tmp, sizeof(tmp), "R%d", reg_num);
318+
if (reg_num == 12) {
319+
*uses_r12_latch = true;
320+
}
318321
break;
319322
case PARAM_V:
320323
reg_num = vsh_get_field(shader_token, FLD_V);
@@ -346,12 +349,11 @@ static MString *decode_opcode_input(const uint32_t *shader_token,
346349
return ret_str;
347350
}
348351

349-
static MString *decode_opcode(const uint32_t *shader_token,
350-
VshOutputMux out_mux, uint32_t mask,
351-
const char *opcode, const char *inputs,
352-
MString **suffix)
352+
static void decode_opcode(MString *ret, const uint32_t *shader_token,
353+
VshOutputMux out_mux, uint32_t mask,
354+
const char *opcode, const char *inputs,
355+
MString **suffix)
353356
{
354-
MString *ret = mstring_new();
355357
int reg_num = vsh_get_field(shader_token, FLD_OUT_R);
356358
bool use_temp_var = false;
357359

@@ -427,14 +429,10 @@ static MString *decode_opcode(const uint32_t *shader_token,
427429
opcode, reg_num, mask_str[mask], inputs);
428430
}
429431
}
430-
431-
return ret;
432432
}
433433

434434
static MString *decode_token(const uint32_t *shader_token)
435435
{
436-
MString *ret;
437-
438436
/* See what MAC opcode is written to (if not masked away): */
439437
VshMAC mac = vsh_get_field(shader_token, FLD_MAC);
440438
/* See if a ILU opcode is present too: */
@@ -443,23 +441,28 @@ static MString *decode_token(const uint32_t *shader_token)
443441
return mstring_new();
444442
}
445443

444+
bool uses_r12 = false;
445+
446446
/* Since it's potentially used twice, decode input C once: */
447447
MString *input_c =
448448
decode_opcode_input(shader_token,
449449
vsh_get_field(shader_token, FLD_C_MUX),
450450
FLD_C_NEG,
451451
(vsh_get_field(shader_token, FLD_C_R_HIGH) << 2)
452-
| vsh_get_field(shader_token, FLD_C_R_LOW));
452+
| vsh_get_field(shader_token, FLD_C_R_LOW),
453+
&uses_r12);
453454

454455
MString *mac_suffix = NULL;
456+
MString *inputs_mac = mstring_new();
457+
455458
if (mac != MAC_NOP) {
456-
MString *inputs_mac = mstring_new();
457459
if (mac_opcode_params[mac].A) {
458460
MString *input_a =
459461
decode_opcode_input(shader_token,
460462
vsh_get_field(shader_token, FLD_A_MUX),
461463
FLD_A_NEG,
462-
vsh_get_field(shader_token, FLD_A_R));
464+
vsh_get_field(shader_token, FLD_A_R),
465+
&uses_r12);
463466
mstring_append(inputs_mac, ", ");
464467
mstring_append(inputs_mac, mstring_get_str(input_a));
465468
mstring_unref(input_a);
@@ -469,7 +472,8 @@ static MString *decode_token(const uint32_t *shader_token)
469472
decode_opcode_input(shader_token,
470473
vsh_get_field(shader_token, FLD_B_MUX),
471474
FLD_B_NEG,
472-
vsh_get_field(shader_token, FLD_B_R));
475+
vsh_get_field(shader_token, FLD_B_R),
476+
&uses_r12);
473477
mstring_append(inputs_mac, ", ");
474478
mstring_append(inputs_mac, mstring_get_str(input_b));
475479
mstring_unref(input_b);
@@ -478,36 +482,39 @@ static MString *decode_token(const uint32_t *shader_token)
478482
mstring_append(inputs_mac, ", ");
479483
mstring_append(inputs_mac, mstring_get_str(input_c));
480484
}
485+
}
481486

487+
MString *ret = mstring_new();
488+
if (uses_r12) {
489+
mstring_append(ret, " R12 = oPos;\n");
490+
}
491+
492+
if (mac != MAC_NOP) {
482493
/* Then prepend these inputs with the actual opcode, mask, and input : */
483-
ret = decode_opcode(shader_token,
484-
OMUX_MAC,
485-
vsh_get_field(shader_token, FLD_OUT_MAC_MASK),
486-
mac_opcode[mac],
487-
mstring_get_str(inputs_mac),
488-
&mac_suffix);
494+
decode_opcode(ret,
495+
shader_token,
496+
OMUX_MAC,
497+
vsh_get_field(shader_token, FLD_OUT_MAC_MASK),
498+
mac_opcode[mac],
499+
mstring_get_str(inputs_mac),
500+
&mac_suffix);
489501
mstring_unref(inputs_mac);
490-
} else {
491-
ret = mstring_new();
492502
}
493503

494504
if (ilu != ILU_NOP) {
495505
MString *inputs_c = mstring_from_str(", ");
496506
mstring_append(inputs_c, mstring_get_str(input_c));
497507

498508
/* Append the ILU opcode, mask and (the already determined) input C: */
499-
MString *ilu_op =
500-
decode_opcode(shader_token,
501-
OMUX_ILU,
502-
vsh_get_field(shader_token, FLD_OUT_ILU_MASK),
503-
ilu_opcode[ilu],
504-
mstring_get_str(inputs_c),
505-
NULL);
506-
507-
mstring_append(ret, mstring_get_str(ilu_op));
509+
decode_opcode(ret,
510+
shader_token,
511+
OMUX_ILU,
512+
vsh_get_field(shader_token, FLD_OUT_ILU_MASK),
513+
ilu_opcode[ilu],
514+
mstring_get_str(inputs_c),
515+
NULL);
508516

509517
mstring_unref(inputs_c);
510-
mstring_unref(ilu_op);
511518
}
512519

513520
mstring_unref(input_c);
@@ -536,7 +543,11 @@ static const char* vsh_header =
536543
"vec4 R9 = vec4(0.0,0.0,0.0,0.0);\n"
537544
"vec4 R10 = vec4(0.0,0.0,0.0,0.0);\n"
538545
"vec4 R11 = vec4(0.0,0.0,0.0,0.0);\n"
539-
"#define R12 oPos\n" /* R12 is a mirror of oPos */
546+
547+
/* R12 is a mirror of oPos and is updated on demand to facilitate
548+
* multi-output / MAC+ILU pairs
549+
*/
550+
"vec4 R12 = vec4(0.0,0.0,0.0,0.0);\n"
540551
"\n"
541552

542553
/* Used to emulate concurrency of paired MAC+ILU instructions */

0 commit comments

Comments
 (0)