|
| 1 | +From 502a42545f015f547546af52475e279602b7db26 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Konrad Dybcio < [email protected]> |
| 3 | +Date: Fri, 16 Feb 2024 20:50:59 +0100 |
| 4 | +Subject: [PATCH] freedreno: Add initial A702 support |
| 5 | + |
| 6 | +Can we forget this SKU exists? |
| 7 | + |
| 8 | +Turns out, not really.. But at least we can get GPU accel on watches! |
| 9 | + |
| 10 | +Signed-off-by: Konrad Dybcio < [email protected]> |
| 11 | +Upstream-Status: Submitted [https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27665] |
| 12 | +--- |
| 13 | + src/freedreno/common/freedreno_dev_info.h | 3 ++ |
| 14 | + src/freedreno/common/freedreno_devices.py | 52 +++++++++++++++++++ |
| 15 | + src/freedreno/registers/adreno/a6xx.xml | 2 +- |
| 16 | + src/freedreno/vulkan/tu_cmd_buffer.cc | 5 +- |
| 17 | + src/freedreno/vulkan/tu_device.cc | 12 ++--- |
| 18 | + src/freedreno/vulkan/tu_image.cc | 2 +- |
| 19 | + src/freedreno/vulkan/tu_pipeline.cc | 8 +-- |
| 20 | + .../drivers/freedreno/a6xx/fd6_emit.cc | 2 +- |
| 21 | + .../drivers/freedreno/a6xx/fd6_gmem.cc | 3 +- |
| 22 | + .../drivers/freedreno/freedreno_resource.c | 2 +- |
| 23 | + 10 files changed, 75 insertions(+), 16 deletions(-) |
| 24 | + |
| 25 | +diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h |
| 26 | +index c8b7884ee8ae..03d3368f018a 100644 |
| 27 | +--- a/src/freedreno/common/freedreno_dev_info.h |
| 28 | ++++ b/src/freedreno/common/freedreno_dev_info.h |
| 29 | +@@ -207,6 +207,9 @@ struct fd_dev_info { |
| 30 | + /* Whether the sad instruction (iadd3) is supported. */ |
| 31 | + bool has_sad; |
| 32 | + |
| 33 | ++ /* A702 cuts A LOT of things.. */ |
| 34 | ++ bool is_a702; |
| 35 | ++ |
| 36 | + struct { |
| 37 | + uint32_t PC_POWER_CNTL; |
| 38 | + uint32_t TPL1_DBG_ECO_CNTL; |
| 39 | +diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py |
| 40 | +index 0e7e74c6a141..c5d28cf34bcc 100644 |
| 41 | +--- a/src/freedreno/common/freedreno_devices.py |
| 42 | ++++ b/src/freedreno/common/freedreno_devices.py |
| 43 | +@@ -819,6 +819,58 @@ add_gpus([ |
| 44 | + ], |
| 45 | + )) |
| 46 | + |
| 47 | ++add_gpus([ |
| 48 | ++ GPUId(702), # KGSL |
| 49 | ++ GPUId(chip_id=0x00b207002000, name="FD702"), # QRB2210 RB1 |
| 50 | ++ GPUId(chip_id=0xffff07002000, name="FD702"), # Default no-speedbin fallback |
| 51 | ++ ], A6xxGPUInfo( |
| 52 | ++ CHIP.A6XX, # NOT a mistake! |
| 53 | ++ [a6xx_base, A6XXProps( |
| 54 | ++ reg_size_vec4 = 48, |
| 55 | ++ instr_cache_size = 64, |
| 56 | ++ indirect_draw_wfm_quirk = True, |
| 57 | ++ has_cp_reg_write = False, |
| 58 | ++ depth_bounds_require_depth_test_quirk = True, |
| 59 | ++ has_gmem_fast_clear = True, |
| 60 | ++ has_hw_multiview = False, |
| 61 | ++ has_sampler_minmax = False, |
| 62 | ++ has_lpac = False, |
| 63 | ++ has_fs_tex_prefetch = False, |
| 64 | ++ sysmem_per_ccu_depth_cache_size = 128 * 1024, # ?????? |
| 65 | ++ sysmem_per_ccu_color_cache_size = 128 * 1024, # ?????? |
| 66 | ++ gmem_ccu_color_cache_fraction = CCUColorCacheFraction.FULL.value, |
| 67 | ++ vs_max_inputs_count = 16, |
| 68 | ++ prim_alloc_threshold = 0x1, |
| 69 | ++ storage_16bit = True, |
| 70 | ++ is_a702 = True, |
| 71 | ++ ) |
| 72 | ++ ], |
| 73 | ++ num_ccu = 1, |
| 74 | ++ tile_align_w = 32, |
| 75 | ++ tile_align_h = 16, |
| 76 | ++ num_vsc_pipes = 16, |
| 77 | ++ cs_shared_mem_size = 16 * 1024, |
| 78 | ++ wave_granularity = 1, |
| 79 | ++ fibers_per_sp = 128 * 16, |
| 80 | ++ threadsize_base = 16, |
| 81 | ++ max_waves = 32, |
| 82 | ++ magic_regs = dict( |
| 83 | ++ PC_POWER_CNTL = 0, |
| 84 | ++ TPL1_DBG_ECO_CNTL = 0x8000, |
| 85 | ++ GRAS_DBG_ECO_CNTL = 0, |
| 86 | ++ SP_CHICKEN_BITS = 0x1400, |
| 87 | ++ UCHE_CLIENT_PF = 0x84, |
| 88 | ++ PC_MODE_CNTL = 0xf, |
| 89 | ++ SP_DBG_ECO_CNTL = 0x0, |
| 90 | ++ RB_DBG_ECO_CNTL = 0x100000, |
| 91 | ++ RB_DBG_ECO_CNTL_blit = 0x100000, |
| 92 | ++ HLSQ_DBG_ECO_CNTL = 0, |
| 93 | ++ RB_UNKNOWN_8E01 = 0x1, |
| 94 | ++ VPC_DBG_ECO_CNTL = 0x0, |
| 95 | ++ UCHE_UNKNOWN_0E12 = 0x1, |
| 96 | ++ ), |
| 97 | ++ )) |
| 98 | ++ |
| 99 | + # Based on a6xx_base + a6xx_gen4 |
| 100 | + a7xx_base = A6XXProps( |
| 101 | + has_gmem_fast_clear = True, |
| 102 | +diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml |
| 103 | +index d49919f6344c..dbf94dbaefab 100644 |
| 104 | +--- a/src/freedreno/registers/adreno/a6xx.xml |
| 105 | ++++ b/src/freedreno/registers/adreno/a6xx.xml |
| 106 | +@@ -4353,7 +4353,7 @@ to upconvert to 32b float internally? |
| 107 | + <reg32 offset="0x9306" name="VPC_SO_DISABLE" usage="rp_blit"> |
| 108 | + <bitfield name="DISABLE" pos="0" type="boolean"/> |
| 109 | + </reg32> |
| 110 | +- <reg32 offset="0x9307" name="VPC_POLYGON_MODE2" variants="A7XX-" usage="rp_blit"> |
| 111 | ++ <reg32 offset="0x9307" name="VPC_POLYGON_MODE2" variants="A6XX-" usage="rp_blit"> <!-- A702 + A7xx --> |
| 112 | + <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> |
| 113 | + </reg32> |
| 114 | + <reg32 offset="0x9308" name="VPC_ATTR_BUF_SIZE_GMEM" variants="A7XX-" usage="rp_blit"> |
| 115 | +diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc |
| 116 | +index 3e3a9f5f1e95..92965586c16e 100644 |
| 117 | +--- a/src/freedreno/vulkan/tu_cmd_buffer.cc |
| 118 | ++++ b/src/freedreno/vulkan/tu_cmd_buffer.cc |
| 119 | +@@ -1094,7 +1094,8 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, |
| 120 | + |
| 121 | + tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5_OFFSET, 4); |
| 122 | + tu_cs_emit(cs, tiling->pipe_sizes[pipe] | |
| 123 | +- CP_SET_BIN_DATA5_0_VSC_N(slot)); |
| 124 | ++ CP_SET_BIN_DATA5_0_VSC_N(slot) |
| 125 | ++ /* A702 also sets BIT(0) but that hangchecks */); |
| 126 | + tu_cs_emit(cs, pipe * cmd->vsc_draw_strm_pitch); |
| 127 | + tu_cs_emit(cs, pipe * 4); |
| 128 | + tu_cs_emit(cs, pipe * cmd->vsc_prim_strm_pitch); |
| 129 | +@@ -1373,7 +1374,7 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs) |
| 130 | + tu_cs_emit_write_reg(cs, REG_A6XX_SP_DBG_ECO_CNTL, |
| 131 | + phys_dev->info->a6xx.magic.SP_DBG_ECO_CNTL); |
| 132 | + tu_cs_emit_write_reg(cs, REG_A6XX_SP_PERFCTR_ENABLE, 0x3f); |
| 133 | +- if (CHIP == A6XX) |
| 134 | ++ if (CHIP == A6XX && !cs->device->physical_device->info->a6xx.is_a702) |
| 135 | + tu_cs_emit_write_reg(cs, REG_A6XX_TPL1_UNKNOWN_B605, 0x44); |
| 136 | + tu_cs_emit_write_reg(cs, REG_A6XX_TPL1_DBG_ECO_CNTL, |
| 137 | + phys_dev->info->a6xx.magic.TPL1_DBG_ECO_CNTL); |
| 138 | +diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc |
| 139 | +index 7b18dcf24f9f..c5874808535b 100644 |
| 140 | +--- a/src/freedreno/vulkan/tu_device.cc |
| 141 | ++++ b/src/freedreno/vulkan/tu_device.cc |
| 142 | +@@ -318,7 +318,7 @@ get_device_extensions(const struct tu_physical_device *device, |
| 143 | + #endif |
| 144 | + .EXT_texel_buffer_alignment = true, |
| 145 | + .EXT_tooling_info = true, |
| 146 | +- .EXT_transform_feedback = true, |
| 147 | ++ .EXT_transform_feedback = !device->info->a6xx.is_a702, |
| 148 | + .EXT_vertex_attribute_divisor = true, |
| 149 | + .EXT_vertex_input_dynamic_state = true, |
| 150 | + |
| 151 | +@@ -352,15 +352,15 @@ tu_get_features(struct tu_physical_device *pdevice, |
| 152 | + features->fullDrawIndexUint32 = true; |
| 153 | + features->imageCubeArray = true; |
| 154 | + features->independentBlend = true; |
| 155 | +- features->geometryShader = true; |
| 156 | +- features->tessellationShader = true; |
| 157 | ++ features->geometryShader = !pdevice->info->a6xx.is_a702; |
| 158 | ++ features->tessellationShader = !pdevice->info->a6xx.is_a702; |
| 159 | + features->sampleRateShading = true; |
| 160 | + features->dualSrcBlend = true; |
| 161 | + features->logicOp = true; |
| 162 | + features->multiDrawIndirect = true; |
| 163 | + features->drawIndirectFirstInstance = true; |
| 164 | + features->depthClamp = true; |
| 165 | +- features->depthBiasClamp = true; |
| 166 | ++ features->depthBiasClamp = !pdevice->info->a6xx.is_a702; |
| 167 | + features->fillModeNonSolid = true; |
| 168 | + features->depthBounds = true; |
| 169 | + features->wideLines = pdevice->info->a6xx.line_width_max > 1.0; |
| 170 | +@@ -502,7 +502,7 @@ tu_get_features(struct tu_physical_device *pdevice, |
| 171 | + features->indexTypeUint8 = true; |
| 172 | + |
| 173 | + /* VK_KHR_line_rasterization */ |
| 174 | +- features->rectangularLines = true; |
| 175 | ++ features->rectangularLines = !pdevice->info->a6xx.is_a702; |
| 176 | + features->bresenhamLines = true; |
| 177 | + features->smoothLines = false; |
| 178 | + features->stippledRectangularLines = false; |
| 179 | +@@ -1039,7 +1039,7 @@ tu_get_properties(struct tu_physical_device *pdevice, |
| 180 | + props->subPixelInterpolationOffsetBits = 4; |
| 181 | + props->maxFramebufferWidth = (1 << 14); |
| 182 | + props->maxFramebufferHeight = (1 << 14); |
| 183 | +- props->maxFramebufferLayers = (1 << 10); |
| 184 | ++ props->maxFramebufferLayers = (1 << (pdevice->info->a6xx.is_a702 ? 8 : 10)); |
| 185 | + props->framebufferColorSampleCounts = sample_counts; |
| 186 | + props->framebufferDepthSampleCounts = sample_counts; |
| 187 | + props->framebufferStencilSampleCounts = sample_counts; |
| 188 | +diff --git a/src/freedreno/vulkan/tu_image.cc b/src/freedreno/vulkan/tu_image.cc |
| 189 | +index da5e1e520a4c..10ed35a25c9e 100644 |
| 190 | +--- a/src/freedreno/vulkan/tu_image.cc |
| 191 | ++++ b/src/freedreno/vulkan/tu_image.cc |
| 192 | +@@ -739,7 +739,7 @@ tu_image_init(struct tu_device *device, struct tu_image *image, |
| 193 | + } |
| 194 | + } |
| 195 | + |
| 196 | +- if (TU_DEBUG(NOUBWC)) { |
| 197 | ++ if (TU_DEBUG(NOUBWC) || device->physical_device->info->a6xx.is_a702) { |
| 198 | + image->ubwc_enabled = false; |
| 199 | + } |
| 200 | + |
| 201 | +diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc |
| 202 | +index a9dc691bfa19..308830797656 100644 |
| 203 | +--- a/src/freedreno/vulkan/tu_pipeline.cc |
| 204 | ++++ b/src/freedreno/vulkan/tu_pipeline.cc |
| 205 | +@@ -3106,7 +3106,9 @@ tu6_rast_size(struct tu_device *dev, |
| 206 | + bool multiview, |
| 207 | + bool per_view_viewport) |
| 208 | + { |
| 209 | +- if (CHIP == A6XX) { |
| 210 | ++ if (CHIP == A6XX && dev->physical_device->info->a6xx.is_a702) { |
| 211 | ++ return 17; |
| 212 | ++ } else if (CHIP == A6XX) { |
| 213 | + return 15 + (dev->physical_device->info->a6xx.has_legacy_pipeline_shading_rate ? 8 : 0); |
| 214 | + } else { |
| 215 | + return 25; |
| 216 | +@@ -3155,9 +3157,9 @@ tu6_emit_rast(struct tu_cs *cs, |
| 217 | + tu_cs_emit_regs(cs, |
| 218 | + PC_POLYGON_MODE(CHIP, polygon_mode)); |
| 219 | + |
| 220 | +- if (CHIP == A7XX) { |
| 221 | ++ if (CHIP == A7XX || cs->device->physical_device->info->a6xx.is_a702) { |
| 222 | + tu_cs_emit_regs(cs, |
| 223 | +- A7XX_VPC_POLYGON_MODE2(polygon_mode)); |
| 224 | ++ A6XX_VPC_POLYGON_MODE2(polygon_mode)); |
| 225 | + } |
| 226 | + |
| 227 | + tu_cs_emit_regs(cs, PC_RASTER_CNTL(CHIP, |
| 228 | +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc |
| 229 | +index 89ed01437d40..2a36c3fcdf9d 100644 |
| 230 | +--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc |
| 231 | ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc |
| 232 | +@@ -900,7 +900,7 @@ fd6_emit_static_regs(struct fd_context *ctx, struct fd_ringbuffer *ring) |
| 233 | + WRITE(REG_A6XX_SP_FLOAT_CNTL, A6XX_SP_FLOAT_CNTL_F16_NO_INF); |
| 234 | + WRITE(REG_A6XX_SP_DBG_ECO_CNTL, screen->info->a6xx.magic.SP_DBG_ECO_CNTL); |
| 235 | + WRITE(REG_A6XX_SP_PERFCTR_ENABLE, 0x3f); |
| 236 | +- if (CHIP == A6XX) |
| 237 | ++ if (CHIP == A6XX && !screen->info->a6xx.is_a702) |
| 238 | + WRITE(REG_A6XX_TPL1_UNKNOWN_B605, 0x44); |
| 239 | + WRITE(REG_A6XX_TPL1_DBG_ECO_CNTL, screen->info->a6xx.magic.TPL1_DBG_ECO_CNTL); |
| 240 | + if (CHIP == A6XX) { |
| 241 | +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc |
| 242 | +index 309ac5006b91..d346caf6e328 100644 |
| 243 | +--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc |
| 244 | ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc |
| 245 | +@@ -1278,7 +1278,8 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) |
| 246 | + |
| 247 | + OUT_PKT7(ring, CP_SET_BIN_DATA5, 7); |
| 248 | + OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) | |
| 249 | +- CP_SET_BIN_DATA5_0_VSC_N(tile->n)); |
| 250 | ++ CP_SET_BIN_DATA5_0_VSC_N(tile->n) |
| 251 | ++ /* A702 also sets BIT(0) but that hangchecks */); |
| 252 | + OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */ |
| 253 | + (tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0); |
| 254 | + OUT_RELOC( |
| 255 | +diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c |
| 256 | +index 4a1dc734d3c5..f91964ad97e8 100644 |
| 257 | +--- a/src/gallium/drivers/freedreno/freedreno_resource.c |
| 258 | ++++ b/src/gallium/drivers/freedreno/freedreno_resource.c |
| 259 | +@@ -1302,7 +1302,7 @@ get_best_layout(struct fd_screen *screen, |
| 260 | + return LINEAR; |
| 261 | + } |
| 262 | + |
| 263 | +- bool ubwc_ok = is_a6xx(screen); |
| 264 | ++ bool ubwc_ok = is_a6xx(screen) && !screen->info->a6xx.is_a702; |
| 265 | + if (FD_DBG(NOUBWC)) |
| 266 | + ubwc_ok = false; |
| 267 | + |
0 commit comments