@@ -425,6 +425,120 @@ namespace lsp
425
425
426
426
#undef EFF_HSLA_LIGHT_CORE
427
427
428
+ #define EFF_HSLA_ALPHA_CORE \
429
+ /* v0 = v[0] */ \
430
+ /* v1 = v[1] */ \
431
+ /* v8 = h */ \
432
+ /* v9 = s */ \
433
+ /* v10 = l */ \
434
+ /* v11 = a */ \
435
+ /* v14 = 0 */ \
436
+ /* v15 = 1 */ \
437
+ __ASM_EMIT (" fsub v2.4s, v15.4s, v0.4s" ) /* v2 = 1 - v */ \
438
+ __ASM_EMIT (" fsub v6.4s, v15.4s, v1.4s" ) \
439
+ __ASM_EMIT (" fcmgt v4.4s, v14.4s, v0.4s" ) /* v4 = 0 > v */ \
440
+ __ASM_EMIT (" fcmgt v5.4s, v14.4s, v1.4s" ) \
441
+ __ASM_EMIT (" fadd v3.4s, v0.4s, v15.4s" ) /* v3 = v + 1 */ \
442
+ __ASM_EMIT (" fadd v7.4s, v1.4s, v15.4s" ) \
443
+ __ASM_EMIT (" bif v3.16b, v2.16b, v4.16b" ) /* v0 = V = (v+1)&[0>v] | (1-v)&[0<=v] */ \
444
+ __ASM_EMIT (" bif v7.16b, v6.16b, v5.16b" ) \
445
+ __ASM_EMIT (" mov v0.16b, v8.16b" ) \
446
+ __ASM_EMIT (" mov v1.16b, v9.16b" ) \
447
+ __ASM_EMIT (" mov v2.16b, v10.16b" ) \
448
+ __ASM_EMIT (" mov v4.16b, v8.16b" ) \
449
+ __ASM_EMIT (" mov v5.16b, v9.16b" ) \
450
+ __ASM_EMIT (" mov v6.16b, v10.16b" )
451
+
452
+ /*
453
+ value = v[i];
454
+ value = (0.0f > value) ? 1.0f + value : 1.0f - value;
455
+
456
+ dst[0] = eff->h;
457
+ dst[1] = eff->s;
458
+ dst[2] = eff->l;
459
+ dst[3] = value; // Fill alpha channel
460
+ */
461
+
462
+ IF_ARCH_AARCH64
463
+ (
464
+ static const float EFF_HSLA_ALPHA_XC[] __lsp_aligned16 =
465
+ {
466
+ LSP_DSP_VEC4 (0 .0f ),
467
+ LSP_DSP_VEC4 (1 .0f )
468
+ };
469
+ )
470
+
471
+ void eff_hsla_alpha (float *dst, const float *v, const dsp::hsla_alpha_eff_t *eff, size_t count)
472
+ {
473
+ ARCH_AARCH64_ASM
474
+ (
475
+ __ASM_EMIT (" ld4r {v8.4s, v9.4s, v10.4s, v11.4s}, [%[eff]]" ) /* v8 = h, v9 = s, v10 = l, v11 = a */
476
+ __ASM_EMIT (" ldp q14, q15, [%[XC]]" ) /* v14 = 0.0, v15 = 1.0 */
477
+
478
+ // -----------------------------------------------------------------
479
+ // 8x blocks
480
+ __ASM_EMIT (" subs %[count], %[count], #8" )
481
+ __ASM_EMIT (" b.lo 2f" )
482
+ __ASM_EMIT (" 1:" )
483
+ __ASM_EMIT (" ldp q0, q1, [%[src]]" ) /* v8 = v[0], v9 = v[1] */
484
+ EFF_HSLA_ALPHA_CORE
485
+ __ASM_EMIT (" subs %[count], %[count], #8" )
486
+ __ASM_EMIT (" st4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%[dst]]" )
487
+ __ASM_EMIT (" add %[dst], %[dst], 0x40" )
488
+ __ASM_EMIT (" add %[src], %[src], 0x20" )
489
+ __ASM_EMIT (" st4 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[dst]]" )
490
+ __ASM_EMIT (" add %[dst], %[dst], 0x40" )
491
+ __ASM_EMIT (" b.hs 1b" )
492
+
493
+ // -----------------------------------------------------------------
494
+ // 1x-8x block
495
+ __ASM_EMIT (" 2:" )
496
+ __ASM_EMIT (" adds %[count], %[count], #8" )
497
+ __ASM_EMIT (" b.ls 14f" )
498
+ __ASM_EMIT (" tst %[count], #4" )
499
+ __ASM_EMIT (" b.eq 4f" )
500
+ __ASM_EMIT (" ldr q0, [%[src]]" )
501
+ __ASM_EMIT (" add %[src], %[src], 0x10" )
502
+ __ASM_EMIT (" 4:" )
503
+ __ASM_EMIT (" tst %[count], #2" )
504
+ __ASM_EMIT (" b.eq 6f" )
505
+ __ASM_EMIT (" ld1 {v1.2s}, [%[src]]" )
506
+ __ASM_EMIT (" add %[src], %[src], 0x08" )
507
+ __ASM_EMIT (" 6:" )
508
+ __ASM_EMIT (" tst %[count], #1" )
509
+ __ASM_EMIT (" b.eq 8f" )
510
+ __ASM_EMIT (" ld1 {v1.s}[2], [%[src]]" )
511
+ __ASM_EMIT (" 8:" )
512
+ EFF_HSLA_ALPHA_CORE
513
+ __ASM_EMIT (" tst %[count], #4" )
514
+ __ASM_EMIT (" b.eq 10f" )
515
+ __ASM_EMIT (" st4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%[dst]]" )
516
+ __ASM_EMIT (" add %[dst], %[dst], 0x40" )
517
+ __ASM_EMIT (" 10:" )
518
+ __ASM_EMIT (" tst %[count], #2" )
519
+ __ASM_EMIT (" b.eq 12f" )
520
+ __ASM_EMIT (" st4 {v4.2s, v5.2s, v6.2s, v7.2s}, [%[dst]]" )
521
+ __ASM_EMIT (" add %[dst], %[dst], 0x20" )
522
+ __ASM_EMIT (" 12:" )
523
+ __ASM_EMIT (" tst %[count], #1" )
524
+ __ASM_EMIT (" b.eq 14f" )
525
+ __ASM_EMIT (" st4 {v4.s, v5.s, v6.s, v7.s}[2], [%[dst]]" )
526
+ // End
527
+ __ASM_EMIT (" 14:" )
528
+
529
+ : [dst] " +r" (dst), [src] " +r" (v), [count] " +r" (count)
530
+ : [eff] " r" (eff),
531
+ [XC] " r" (&EFF_HSLA_ALPHA_XC[0 ])
532
+ : " cc" , " memory" ,
533
+ " v0" , " v1" , " v2" , " v3" ,
534
+ " v4" , " v5" , " v6" , " v7" ,
535
+ " v8" , " v9" , " v10" , " v11" ,
536
+ " v12" , " v13" , " v14" , " v15"
537
+ );
538
+ }
539
+
540
+ #undef EFF_HSLA_ALPHA_CORE
541
+
428
542
} /* namespace asimd */
429
543
} /* namespace lsp */
430
544
0 commit comments