mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-22 07:27:12 +08:00
drm/xe: Waste fewer instructions in emit_wa_job()
I was debugging some unrelated issue and noticed the current code was
very verbose. We can improve it easily by using the more common batch
buffer building pattern.
Before:
bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
c4d: 41 8b 56 10 mov 0x10(%r14),%edx
c51: 49 8b 4e 08 mov 0x8(%r14),%rcx
c55: 8d 72 01 lea 0x1(%rdx),%esi
c58: 41 89 76 10 mov %esi,0x10(%r14)
c5c: c7 04 91 01 00 08 15 movl $0x15080001,(%rcx,%rdx,4)
bb->cs[bb->len++] = entry->reg.addr;
c63: 8b 08 mov (%rax),%ecx
c65: 41 8b 56 10 mov 0x10(%r14),%edx
c69: 49 8b 76 08 mov 0x8(%r14),%rsi
c6d: 81 e1 ff ff 3f 00 and $0x3fffff,%ecx
c73: 8d 7a 01 lea 0x1(%rdx),%edi
c76: 41 89 7e 10 mov %edi,0x10(%r14)
c7a: 89 0c 96 mov %ecx,(%rsi,%rdx,4)
..etc..
After:
*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
c52: 41 c7 04 24 01 00 08 movl $0x15080001,(%r12)
c59: 15
*cs++ = entry->reg.addr;
c5a: 8b 10 mov (%rax),%edx
..etc..
Resulting in the following binary change:
add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-348 (-348)
Function old new delta
xe_gt_record_default_lrcs.cold 304 296 -8
xe_gt_record_default_lrcs 2200 1860 -340
Total: Before=13554, After=13206, chg -2.57%
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250710-lrc-refactors-v2-7-a5e2ca03f6bd@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
This commit is contained in:
committed by
Lucas De Marchi
parent
f4b538245f
commit
aded26ccaa
@@ -1888,7 +1888,7 @@ static const struct instr_state xe_hpg_svg_state[] = {
|
||||
{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
|
||||
};
|
||||
|
||||
void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
|
||||
u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs)
|
||||
{
|
||||
struct xe_gt *gt = q->hwe->gt;
|
||||
struct xe_device *xe = gt_to_xe(gt);
|
||||
@@ -1923,7 +1923,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
|
||||
if (!state_table) {
|
||||
xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
|
||||
GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
|
||||
return;
|
||||
return cs;
|
||||
}
|
||||
|
||||
for (int i = 0; i < state_table_size; i++) {
|
||||
@@ -1946,12 +1946,14 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
|
||||
instr == CMD_3DSTATE_DRAWING_RECTANGLE)
|
||||
instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
|
||||
|
||||
bb->cs[bb->len] = instr;
|
||||
*cs = instr;
|
||||
if (!is_single_dw)
|
||||
bb->cs[bb->len] |= (num_dw - 2);
|
||||
*cs |= (num_dw - 2);
|
||||
|
||||
bb->len += num_dw;
|
||||
cs += num_dw;
|
||||
}
|
||||
|
||||
return cs;
|
||||
}
|
||||
|
||||
struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
|
||||
|
||||
Reference in New Issue
Block a user