I am wondering if some extra flushes are needed in regard to what the G45 PRM PDFs say about the BLT (section 8.6, vol 1b p. 170) git + this gives only a moderate amount of corrupt rendering: diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index e239c21..f150e5b 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -63,7 +63,7 @@ #define NO_FILL_BOXES 0 #define NO_VIDEO 0 -#define MAX_FLUSH_VERTICES 1 /* was 6, https://bugs.freedesktop.org/show_bug.cgi?id=55500 */ +#define MAX_FLUSH_VERTICES 12 /* was 6, https://bugs.freedesktop.org/show_bug.cgi?id=55500 */ #define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) @@ -571,26 +571,28 @@ static void gen4_emit_vertex_buffer(struct sna *sna, inline static void gen4_emit_pipe_flush(struct sna *sna) { -#if 1 +#if 0 OUT_BATCH(GEN4_PIPE_CONTROL | (4 - 2)); - OUT_BATCH(GEN4_PIPE_CONTROL_WC_FLUSH); + OUT_BATCH(GEN4_PIPE_CONTROL_WC_FLUSH | GEN4_PIPE_CONTROL_TC_FLUSH); OUT_BATCH(0); OUT_BATCH(0); #else OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + /* OUT_BATCH(MI_NOOP); */ #endif } inline static void gen4_emit_pipe_break(struct sna *sna) { -#if 1 +#if 0 OUT_BATCH(GEN4_PIPE_CONTROL | (4 - 2)); - OUT_BATCH(0); + OUT_BATCH(GEN4_PIPE_CONTROL_TC_FLUSH); OUT_BATCH(0); OUT_BATCH(0); #else OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + /* OUT_BATCH(MI_NOOP); */ #endif } @@ -599,11 +601,12 @@ gen4_emit_pipe_invalidate(struct sna *sna) { #if 0 OUT_BATCH(GEN4_PIPE_CONTROL | (4 - 2)); - OUT_BATCH(GEN4_PIPE_CONTROL_WC_FLUSH | GEN4_PIPE_CONTROL_TC_FLUSH); + OUT_BATCH(GEN4_PIPE_CONTROL_WC_FLUSH | GEN4_PIPE_CONTROL_TC_FLUSH | GEN4_PIPE_CONTROL_IS_FLUSH); OUT_BATCH(0); OUT_BATCH(0); #else - OUT_BATCH(MI_FLUSH); + OUT_BATCH(MI_FLUSH); /* | MI_STATE_INSTRUCTION_CACHE_FLUSH */ + /* OUT_BATCH(MI_NOOP); */ #endif } @@ -781,7 +784,10 @@ gen4_emit_urb(struct sna *sna) urb_cl_end = urb_gs_end + URB_CL_ENTRIES * URB_CL_ENTRY_SIZE; urb_sf_end = urb_cl_end + URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; urb_cs_end = urb_sf_end + URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; - assert(urb_cs_end <= 256); + if (sna->kgem.gen >= 045) + assert(urb_cs_end <= 384); + else + assert(urb_cs_end <= 256); while ((sna->kgem.nbatch & 15) > 12) OUT_BATCH(MI_NOOP); @@ -1623,6 +1629,7 @@ gen4_render_composite_done(struct sna *sna, kgem_bo_destroy(&sna->kgem, op->src.bo); sna_render_composite_redirect_done(sna, op); + gen4_emit_pipe_invalidate(sna); } static bool @@ -2154,6 +2161,7 @@ gen4_render_composite_spans_done(struct sna *sna, kgem_bo_destroy(&sna->kgem, op->base.src.bo); sna_render_composite_redirect_done(sna, &op->base); + gen4_emit_pipe_invalidate(sna); } static bool @@ -2500,6 +2508,7 @@ fallback_blt: gen4_vertex_flush(sna); sna_render_composite_redirect_done(sna, &tmp); kgem_bo_destroy(&sna->kgem, tmp.src.bo); + gen4_emit_pipe_invalidate(sna); return true; fallback_tiled_dst: @@ -2535,6 +2544,7 @@ gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op) { if (sna->render.vertex_offset) gen4_vertex_flush(sna); + gen4_emit_pipe_invalidate(sna); } static bool @@ -2736,6 +2746,7 @@ gen4_render_fill_boxes(struct sna *sna, gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); + gen4_emit_pipe_invalidate(sna); return true; } @@ -2776,6 +2787,7 @@ gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) if (sna->render.vertex_offset) gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); + gen4_emit_pipe_invalidate(sna); } static bool I've also tried setting "Render Cache Operational Flush Enable" of the Cache_Mode_0 register with intel_reg_write, this made no difference. I was also wondering if firefox is particularly bad because it uses it's own old version of cairo which seems to be version 1.9.8 plus lots of patches.