diff -u mesa-23.0.1/debian/changelog mesa-23.0.1/debian/changelog --- mesa-23.0.1/debian/changelog +++ mesa-23.0.1/debian/changelog @@ -1,3 +1,9 @@ +mesa (23.0.1-1ubuntu2) lunar; urgency=medium + + * freedreno: a690 support. + + -- Juerg Haefliger Mon, 03 Apr 2023 19:24:38 +0200 + mesa (23.0.1-1ubuntu1) lunar; urgency=medium * Merge from Debian. (LP: #2009481) diff -u mesa-23.0.1/debian/patches/series mesa-23.0.1/debian/patches/series --- mesa-23.0.1/debian/patches/series +++ mesa-23.0.1/debian/patches/series @@ -3,0 +4,6 @@ + +# freedreno: a690 support +# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21573 +0001-freedreno-Specify-GMEM-tile-alignment-per-GPU.patch +0002-freedreno-tu-Big-GMEM-support.patch +0003-freedreno-tu-Add-a690-support.patch only in patch2: unchanged: --- mesa-23.0.1.orig/debian/patches/0001-freedreno-Specify-GMEM-tile-alignment-per-GPU.patch +++ mesa-23.0.1/debian/patches/0001-freedreno-Specify-GMEM-tile-alignment-per-GPU.patch @@ -0,0 +1,149 @@ +From d632a43c69d4b93cf67380b5f2d5674b0947a145 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Sun, 22 Jan 2023 10:57:30 -0800 +Subject: [PATCH 1/3] freedreno: Specify GMEM tile alignment per GPU + +They differ presumably based on # of CCU/SP and DDR bus topology. + +Signed-off-by: Rob Clark +Part-of: +Signed-off-by: Juerg Haefliger +--- + src/freedreno/common/freedreno_devices.py | 38 +++++++++++-------- + .../drivers/freedreno/freedreno_gmem.c | 2 +- + 2 files changed, 23 insertions(+), 17 deletions(-) + +diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py +index e9fd3b5c9c31..4dd66c6a19c0 100644 +--- a/src/freedreno/common/freedreno_devices.py ++++ b/src/freedreno/common/freedreno_devices.py +@@ -108,19 +108,17 @@ class A6xxGPUInfo(GPUInfo): + into distinct sub-generations. The template parameter avoids + duplication of parameters that are unique to the sub-generation. + """ +- def __init__(self, template, num_sp_cores, num_ccu, magic_regs): ++ def __init__(self, template, num_ccu, tile_align_w, tile_align_h, magic_regs): + super().__init__(gmem_align_w = 16, gmem_align_h = 4, +- tile_align_w = 32, tile_align_h = 32, ++ tile_align_w = tile_align_w, ++ tile_align_h = tile_align_h, + tile_max_w = 1024, # max_bitfield_val(5, 0, 5) + tile_max_h = max_bitfield_val(14, 8, 4), + num_vsc_pipes = 32) +- assert(num_sp_cores == num_ccu) + +- self.num_sp_cores = num_sp_cores +- +- # 96 tile alignment seems correlated to 3 CCU +- if num_ccu == 3: +- self.tile_align_w = 96 ++ # The # of SP cores seems to always match # of CCU ++ self.num_sp_cores = num_ccu ++ self.num_ccu = num_ccu + + self.a6xx = Struct() + self.a6xx.magic = Struct() +@@ -273,8 +271,9 @@ add_gpus([ + GPUId(619), + ], A6xxGPUInfo( + a6xx_gen1, +- num_sp_cores = 1, + num_ccu = 1, ++ tile_align_w = 32, ++ tile_align_h = 16, + magic_regs = dict( + PC_POWER_CNTL = 0, + TPL1_DBG_ECO_CNTL = 0x00108000, +@@ -296,8 +295,9 @@ add_gpus([ + GPUId(620), + ], A6xxGPUInfo( + a6xx_gen1, +- num_sp_cores = 1, + num_ccu = 1, ++ tile_align_w = 32, ++ tile_align_h = 16, + magic_regs = dict( + PC_POWER_CNTL = 0, + TPL1_DBG_ECO_CNTL = 0x01008000, +@@ -319,8 +319,9 @@ add_gpus([ + GPUId(630), + ], A6xxGPUInfo( + a6xx_gen1, +- num_sp_cores = 2, + num_ccu = 2, ++ tile_align_w = 32, ++ tile_align_h = 16, + magic_regs = dict( + PC_POWER_CNTL = 1, + TPL1_DBG_ECO_CNTL = 0x00108000, +@@ -342,8 +343,9 @@ add_gpus([ + GPUId(640), + ], A6xxGPUInfo( + a6xx_gen2, +- num_sp_cores = 2, + num_ccu = 2, ++ tile_align_w = 32, ++ tile_align_h = 16, + magic_regs = dict( + PC_POWER_CNTL = 1, + TPL1_DBG_ECO_CNTL = 0x00008000, +@@ -365,8 +367,9 @@ add_gpus([ + GPUId(680), + ], A6xxGPUInfo( + a6xx_gen2, +- num_sp_cores = 4, + num_ccu = 4, ++ tile_align_w = 64, ++ tile_align_h = 32, + magic_regs = dict( + PC_POWER_CNTL = 3, + TPL1_DBG_ECO_CNTL = 0x00108000, +@@ -388,8 +391,9 @@ add_gpus([ + GPUId(650), + ], A6xxGPUInfo( + a6xx_gen3, +- num_sp_cores = 3, + num_ccu = 3, ++ tile_align_w = 96, ++ tile_align_h = 48, + magic_regs = dict( + PC_POWER_CNTL = 2, + # this seems to be a chicken bit that fixes cubic filtering: +@@ -416,8 +420,9 @@ add_gpus([ + GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"), + ], A6xxGPUInfo( + a6xx_gen4, +- num_sp_cores = 2, + num_ccu = 2, ++ tile_align_w = 32, ++ tile_align_h = 16, + magic_regs = dict( + PC_POWER_CNTL = 1, + TPL1_DBG_ECO_CNTL = 0x05008000, +@@ -439,8 +444,9 @@ add_gpus([ + GPUId(660), + ], A6xxGPUInfo( + a6xx_gen4, +- num_sp_cores = 3, + num_ccu = 3, ++ tile_align_w = 96, ++ tile_align_h = 16, + magic_regs = dict( + PC_POWER_CNTL = 2, + TPL1_DBG_ECO_CNTL = 0x05008000, +diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c +index f81f34ca37a2..9c0d780866a2 100644 +--- a/src/gallium/drivers/freedreno/freedreno_gmem.c ++++ b/src/gallium/drivers/freedreno/freedreno_gmem.c +@@ -530,7 +530,7 @@ gmem_key_init(struct fd_batch *batch, bool assume_zs, bool no_scis_opt) + */ + key->gmem_page_align = 8; + } else if (is_a6xx(screen)) { +- key->gmem_page_align = (screen->info->tile_align_w == 96) ? 3 : 1; ++ key->gmem_page_align = screen->info->num_ccu; + } else { + // TODO re-check this across gens.. maybe it should only + // be a single page in some cases: +-- +2.37.2 + only in patch2: unchanged: --- mesa-23.0.1.orig/debian/patches/0002-freedreno-tu-Big-GMEM-support.patch +++ mesa-23.0.1/debian/patches/0002-freedreno-tu-Big-GMEM-support.patch @@ -0,0 +1,204 @@ +From 9ec1ca6c5f646a6ae77759b375963ebd7558aba3 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Fri, 24 Feb 2023 14:41:14 -0800 +Subject: [PATCH 2/3] freedreno+tu: Big GMEM support + +Signed-off-by: Rob Clark +Part-of: + +[juergh: Drop modifications of .gitlab-ci files] +Signed-off-by: Juerg Haefliger +--- + src/freedreno/registers/adreno/a6xx.xml | 12 ++++++---- + src/freedreno/vulkan/tu_cmd_buffer.c | 24 +++++++++++++------ + .../drivers/freedreno/a6xx/fd6_blitter.c | 3 +-- + src/gallium/drivers/freedreno/a6xx/fd6_draw.c | 2 +- + src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 15 ++++++++++++ + src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 1 + + src/gallium/drivers/freedreno/a6xx/fd6_gmem.c | 12 +++------- + 7 files changed, 45 insertions(+), 24 deletions(-) + +diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml +index 591e91e829c5..a960d5d25741 100644 +--- a/src/freedreno/registers/adreno/a6xx.xml ++++ b/src/freedreno/registers/adreno/a6xx.xml +@@ -2343,6 +2343,13 @@ to upconvert to 32b float internally? + + + ++ ++ ++ ++ ++ ++ ++ + + +- +- +- +- +- + + + +diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c +index 426e04d96c11..77842ab7b112 100644 +--- a/src/freedreno/vulkan/tu_cmd_buffer.c ++++ b/src/freedreno/vulkan/tu_cmd_buffer.c +@@ -199,6 +199,18 @@ tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer) + &cmd_buffer->state.renderpass_cache); + } + ++static struct fd_reg_pair ++rb_ccu_cntl(uint32_t color_offset, bool gmem) ++{ ++ uint32_t color_offset_hi = color_offset >> 21; ++ color_offset &= 0x1fffff; ++ return A6XX_RB_CCU_CNTL( ++ .color_offset = color_offset, ++ .color_offset_hi = color_offset_hi, ++ .gmem = gmem, ++ ); ++} ++ + /* Cache flushes for things that use the color/depth read/write path (i.e. + * blits and draws). This deals with changing CCU state as well as the usual + * cache flushing. +@@ -242,11 +254,10 @@ tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer, + if (ccu_state != cmd_buffer->state.ccu_state) { + struct tu_physical_device *phys_dev = cmd_buffer->device->physical_device; + tu_cs_emit_regs(cs, +- A6XX_RB_CCU_CNTL(.color_offset = +- ccu_state == TU_CMD_CCU_GMEM ? +- phys_dev->ccu_offset_gmem : +- phys_dev->ccu_offset_bypass, +- .gmem = ccu_state == TU_CMD_CCU_GMEM)); ++ rb_ccu_cntl(ccu_state == TU_CMD_CCU_GMEM ? ++ phys_dev->ccu_offset_gmem : ++ phys_dev->ccu_offset_bypass, ++ ccu_state == TU_CMD_CCU_GMEM)); + cmd_buffer->state.ccu_state = ccu_state; + } + } +@@ -932,8 +943,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) + cmd->state.cache.pending_flush_bits &= + ~(TU_CMD_FLAG_WAIT_FOR_IDLE | TU_CMD_FLAG_CACHE_INVALIDATE); + +- tu_cs_emit_regs(cs, +- A6XX_RB_CCU_CNTL(.color_offset = phys_dev->ccu_offset_bypass)); ++ tu_cs_emit_regs(cs, rb_ccu_cntl(phys_dev->ccu_offset_bypass, false)); + cmd->state.ccu_state = TU_CMD_CCU_SYSMEM; + tu_cs_emit_write_reg(cs, REG_A6XX_RB_DBG_ECO_CNTL, + phys_dev->info->a6xx.magic.RB_DBG_ECO_CNTL); +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c +index 6976e6c707f2..4e95e1deaeab 100644 +--- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c +@@ -258,8 +258,7 @@ emit_setup(struct fd_batch *batch) + + /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */ + OUT_WFI5(ring); +- OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); +- OUT_RING(ring, A6XX_RB_CCU_CNTL_COLOR_OFFSET(screen->ccu_offset_bypass)); ++ fd6_emit_ccu_cntl(ring, screen, false); + } + + static void +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +index 525dcd814688..1e95a226793a 100644 +--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +@@ -377,7 +377,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) a + + OUT_WFI5(ring); + +- OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass)); ++ fd6_emit_ccu_cntl(ring, screen, false); + + OUT_REG(ring, + A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true, +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +index 98577637fc81..fc769e3ecfa4 100644 +--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +@@ -959,6 +959,21 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + } + } + ++void ++fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem) ++{ ++ uint32_t offset = gmem ? screen->ccu_offset_gmem : screen->ccu_offset_bypass; ++ uint32_t offset_hi = offset >> 21; ++ offset &= 0x1fffff; ++ ++ OUT_REG(ring, A6XX_RB_CCU_CNTL( ++ .concurrent_resolve = gmem && screen->info->a6xx.concurrent_resolve, ++ .color_offset_hi = offset_hi, ++ .gmem = gmem, ++ .color_offset = offset, ++ )); ++} ++ + /* emit setup at begin of new cmdstream buffer (don't rely on previous + * state, there could have been a context switch between ioctls): + */ +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +index f60d1b82c21c..fdfef2cd90a5 100644 +--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +@@ -321,6 +321,7 @@ void fd6_emit_3d_state(struct fd_ringbuffer *ring, + void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct ir3_shader_variant *cp) assert_dt; + ++void fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem); + void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); + + void fd6_emit_init_screen(struct pipe_screen *pscreen); +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +index 5542ff82f97c..faf23953f7f9 100644 +--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +@@ -765,10 +765,7 @@ emit_binning_pass(struct fd_batch *batch) assert_dt + + OUT_WFI5(ring); + +- OUT_REG(ring, +- A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem, +- .gmem = true, +- .concurrent_resolve = screen->info->a6xx.concurrent_resolve)); ++ fd6_emit_ccu_cntl(ring, screen, true); + } + + static void +@@ -833,10 +830,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt + OUT_RING(ring, 0x1); + + fd_wfi(batch, ring); +- OUT_REG(ring, +- A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem, +- .gmem = true, +- .concurrent_resolve = screen->info->a6xx.concurrent_resolve)); ++ fd6_emit_ccu_cntl(ring, screen, true); + + emit_zs(ring, pfb->zsbuf, batch->gmem_state); + emit_mrt(ring, pfb, batch->gmem_state); +@@ -1616,7 +1610,7 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt + fd6_cache_inv(batch, ring); + + fd_wfi(batch, ring); +- OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass)); ++ fd6_emit_ccu_cntl(ring, screen, false); + + /* enable stream-out, with sysmem there is only one pass: */ + OUT_REG(ring, A6XX_VPC_SO_DISABLE(false)); +-- +2.37.2 + only in patch2: unchanged: --- mesa-23.0.1.orig/debian/patches/0003-freedreno-tu-Add-a690-support.patch +++ mesa-23.0.1/debian/patches/0003-freedreno-tu-Add-a690-support.patch @@ -0,0 +1,50 @@ +From ed220b8da811b22b7a8e6ae6c690dfbef1c5274e Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Mon, 31 Oct 2022 11:53:10 -0700 +Subject: [PATCH 3/3] freedreno+tu: Add a690 support + +Signed-off-by: Rob Clark +Part-of: +Signed-off-by: Juerg Haefliger +--- + src/freedreno/common/freedreno_devices.py | 24 +++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py +index 4dd66c6a19c0..2b21b43c34f8 100644 +--- a/src/freedreno/common/freedreno_devices.py ++++ b/src/freedreno/common/freedreno_devices.py +@@ -464,6 +464,30 @@ add_gpus([ + ) + )) + ++add_gpus([ ++ GPUId(690), ++ ], A6xxGPUInfo( ++ a6xx_gen4, ++ num_ccu = 8, ++ tile_align_w = 64, ++ tile_align_h = 32, ++ magic_regs = dict( ++ PC_POWER_CNTL = 7, ++ TPL1_DBG_ECO_CNTL = 0x01008000, ++ GRAS_DBG_ECO_CNTL = 0x0, ++ SP_CHICKEN_BITS = 0x00001400, ++ UCHE_CLIENT_PF = 0x00000084, ++ PC_MODE_CNTL = 0x1f, ++ SP_DBG_ECO_CNTL = 0x00000000, ++ RB_DBG_ECO_CNTL = 0x00100000, ++ RB_DBG_ECO_CNTL_blit = 0x00100000, # ??? ++ HLSQ_DBG_ECO_CNTL = 0x0, ++ RB_UNKNOWN_8E01 = 0x0, ++ VPC_DBG_ECO_CNTL = 0x02000000, ++ UCHE_UNKNOWN_0E12 = 0x00000001 ++ ) ++ )) ++ + template = """\ + /* Copyright (C) 2021 Google, Inc. + * +-- +2.37.2 +