|
| 1 | +// SPDX-License-Identifier: MIT |
| 2 | +/* |
| 3 | + * Copyright © 2020 Intel Corporation |
| 4 | + */ |
| 5 | + |
| 6 | +/* |
| 7 | + * Kernel for PAVP buffer clear. |
| 8 | + * |
| 9 | + * 1. Clear all 64 GRF registers assigned to the kernel with designated value; |
| 10 | + * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears |
| 11 | + * 512 bytes of Render Cache. |
| 12 | + */ |
| 13 | + |
| 14 | +/* Store designated "clear GRF" value */ |
| 15 | +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; |
| 16 | + |
| 17 | +/** |
| 18 | + * Curbe Format |
| 19 | + * |
| 20 | + * DW 1.0 - Block Offset to write Render Cache |
| 21 | + * DW 1.1 [15:0] - Clear Word |
| 22 | + * DW 1.2 - Delay iterations |
| 23 | + * DW 1.3 - Enable Instrumentation (only for debug) |
| 24 | + * DW 1.4 - Rsvd (intended for context ID) |
| 25 | + * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount |
| 26 | + * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) |
| 27 | + * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) |
| 28 | + * |
| 29 | + * Binding Table |
| 30 | + * |
| 31 | + * BTI 0: 2D Surface to help clear L3 (Render/Data Cache) |
| 32 | + * BTI 1: Wait/Instrumentation Buffer |
| 33 | + * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT) |
| 34 | + * Expected to be initialized to 0 by driver/another kernel |
| 35 | + * Layout: |
| 36 | + * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS] |
| 37 | + * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N |
| 38 | + */ |
| 39 | +add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */ |
| 40 | +cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N }; |
| 41 | +(+f0.0) jmpi(1) 352D { align1 WE_all 1N }; |
| 42 | + |
| 43 | +/** |
| 44 | + * State Register has info on where this thread is running |
| 45 | + * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID |
| 46 | + * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID |
| 47 | + */ |
| 48 | +mov(8) g3<1>UD 0x00000000UD { align1 1Q }; |
| 49 | +shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N }; |
| 50 | +and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */ |
| 51 | +shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N }; |
| 52 | +and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */ |
| 53 | +mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N }; |
| 54 | +add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */ |
| 55 | +shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N }; |
| 56 | +and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */ |
| 57 | +mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N }; |
| 58 | +add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */ |
| 59 | + |
| 60 | +mov(8) g5<1>UD 0x00000000UD { align1 1Q }; |
| 61 | +and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N }; |
| 62 | +mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N }; |
| 63 | + |
| 64 | +mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */ |
| 65 | +mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */ |
| 66 | +mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */ |
| 67 | +mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */ |
| 68 | +and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N }; |
| 69 | + |
| 70 | +/* Media block read to fetch current value at specified location in instrumentation buffer */ |
| 71 | +sendc(8) g5<1>UD g4<8,8,1>F 0x02190001 |
| 72 | + |
| 73 | + render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q }; |
| 74 | +add(1) g5<1>D g5<0,1,0>D 1D { align1 1N }; |
| 75 | + |
| 76 | +/* Media block write for updated value at specified location in instrumentation buffer */ |
| 77 | +sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001 |
| 78 | + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q }; |
| 79 | + |
| 80 | +/* Delay thread for specified parameter */ |
| 81 | +add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N }; |
| 82 | +(+f0.0) jmpi(1) -32D { align1 WE_all 1N }; |
| 83 | + |
| 84 | +/* Store designated "clear GRF" value */ |
| 85 | +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; |
| 86 | + |
| 87 | +/* Initialize looping parameters */ |
| 88 | +mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */ |
| 89 | +mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */ |
| 90 | + |
| 91 | +/* Write 32x16 all "0" block */ |
| 92 | +mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q }; |
| 93 | +mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q }; |
| 94 | +mov(2) g2<1>UD g1<2,2,1>UW { align1 1N }; |
| 95 | +mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */ |
| 96 | +and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N }; |
| 97 | +mov(16) g3<1>UD 0x00000000UD { align1 1H }; |
| 98 | +mov(16) g4<1>UD 0x00000000UD { align1 1H }; |
| 99 | +mov(16) g5<1>UD 0x00000000UD { align1 1H }; |
| 100 | +mov(16) g6<1>UD 0x00000000UD { align1 1H }; |
| 101 | +mov(16) g7<1>UD 0x00000000UD { align1 1H }; |
| 102 | +mov(16) g8<1>UD 0x00000000UD { align1 1H }; |
| 103 | +mov(16) g9<1>UD 0x00000000UD { align1 1H }; |
| 104 | +mov(16) g10<1>UD 0x00000000UD { align1 1H }; |
| 105 | +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 |
| 106 | + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; |
| 107 | +add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N }; |
| 108 | +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 |
| 109 | + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; |
| 110 | + |
| 111 | +/* Now, clear all GRF registers */ |
| 112 | +add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N }; |
| 113 | +mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H }; |
| 114 | +add(1) a0<1>D a0<0,1,0>D 32D { align1 1N }; |
| 115 | +(+f0.0) jmpi(1) -64D { align1 WE_all 1N }; |
| 116 | + |
| 117 | +/* Terminante the thread */ |
| 118 | +sendc(8) null<1>UD g127<8,8,1>F 0x82000010 |
| 119 | + thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT }; |
0 commit comments