2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

Core Changes:

Fix drm_gpusvm kernel-doc (Lucas)
 
 Driver Changes:
 - Release guc ids before cancelling work (Tejas)
 - Remove a duplicated pc_start_call (Rodrigo)
 - Fix an incorrect assert in previous userptr fixes (Thomas)
 - Remove gen11 assertions and prefixes (Lucas)
 - Drop sentinels from arg to xe_rtp_process_to_src (Lucas)
 - Temporarily disable D3Cold on BMG (Rodrigo)
 - Fix MOCS debugfs LNCF readout (Tvrtko)
 - Some ring flush cleanups (Tvrtko)
 - Use unsigned int for alignment in fb pinning code (Tvrtko)
 - Retry and wait longer for GuC PC start (Rodrigo)
 - Recognize 3DSTATE_COARSE_PIXEL in LRC dumps (Matt Roper)
 - Remove reduntant check in xe_vm_create_ioctl() (Xin)
 - A bunch of SRIOV updates (Michal)
 - Add stats for SVM page-faults (Francois)
 - Fix an UAF (Harish)
 - Expose fan speed (Raag)
 - Fix exporting xe buffer objects multiple times (Tomasz)
 - Apply a workaround (Vinay)
 - Simplify pinned bo iteration (Thomas)
 - Remove an incorrect "static" keywork (Lucas)
 - Add support for separate firmware files on each GT (Lucas)
 - Survivability handling fixes (Lucas)
 - Allow to inject error in early probe (Lucas)
 - Fix unmet direct dependencies warning (Yue Haibing)
 - More error injection during probe (Francois)
 - Coding style fix (Maarten)
 - Additional stats support (Riana)
 - Add fault injection for xe_oa_alloc_regs (Nakshrtra)
 - Add a BMG PCI ID (Matt Roper)
 - Some SVM fixes and preliminary SVM multi-device work (Thomas)
 - Switch the migrate code from drm managed to dev managed (Aradhya)
 - Fix an out-of-bounds shift when invalidating TLB (Thomas)
 - Ensure fixed_slice_mode gets set after ccs_mode change (Niranjana)
 - Use local fence in error path of xe_migrate_clear (Matthew Brost)
 - More Workarounds (Julia)
 - Define sysfs_ops on all directories (Tejas)
 - Set power state to D3Cold during s2idle/s3 (Badal)
 - Devcoredump output fix (John)
 - Avoid plain 64-bit division (Arnd Bergmann)
 - Reword a debug message (John)
 - Don't print a hwconfig error message when forcing execlists (Stuart)
 - Restore an error code to avoid a smatch warning (Rodrigo)
 - Invalidate L3 read-only cachelines for geometry streams too (Kenneth)
 - Make PPHWSP size explicit in xe_gt_lrc_size() (Gustavo)
 - Add GT frequency events (Vinay)
 - Fix xe_pt_stage_bind_walk kerneldoc (Thomas)
 - Add a workaround (Aradhya)
 - Rework pinned save/restore (Matthew Auld, Matthew Brost)
 - Allow non-contig VRAM kernel BO (Matthew Auld)
 - Support non-contig VRAM provisioning for SRIOV (Matthew Auld)
 - Allow scratch-pages for unmapped parts of page-faulting VMs. (Oak)
 - Ensure XE_BO_FLAG_CPU_ADDR_MIRROR had a unique value (Matt Roper)
 - Fix taking an invalid lock on wedge (Lucas)
 - Configs and documentation for survivability mode (Riana)
 - Remove an unused macro (Shuicheng)
 - Work around a page-fault full error (Matt Brost)
 - Enable a SRIOV workaround (John)
 - Bump the recommended GuC version (John)
 - Allow to drop VRAM resizing (Lucas)
 - Don't expose privileged debugfs files if VF (Michal)
 - Don't show GGTT/LMEM debugfs files under media GT (Michal)
 - Adjust ring-buffer emission for maximum possible size (Tvrtko)
 - Fix notifier vs folio lock deadlock (Matthew Auld)
 - Stop relying on placement for dma-buf unmap Matthew Auld)
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRskUM7w1oG5rx2IZO4FpNVCsYGvwUCaADVdgAKCRC4FpNVCsYG
 vwhhAP95latf9GQChxq3th3z42uVe6YH0GxdorbgHvbrPkW9NwD+OjKNCe9BjADT
 3T1uC+Lf0MkN4kJwIHp9tS0imyh0AgI=
 =U5BC
 -----END PGP SIGNATURE-----

Merge tag 'drm-xe-next-2025-04-17' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

Core Changes:
Fix drm_gpusvm kernel-doc (Lucas)

Driver Changes:
- Release guc ids before cancelling work (Tejas)
- Remove a duplicated pc_start_call (Rodrigo)
- Fix an incorrect assert in previous userptr fixes (Thomas)
- Remove gen11 assertions and prefixes (Lucas)
- Drop sentinels from arg to xe_rtp_process_to_src (Lucas)
- Temporarily disable D3Cold on BMG (Rodrigo)
- Fix MOCS debugfs LNCF readout (Tvrtko)
- Some ring flush cleanups (Tvrtko)
- Use unsigned int for alignment in fb pinning code (Tvrtko)
- Retry and wait longer for GuC PC start (Rodrigo)
- Recognize 3DSTATE_COARSE_PIXEL in LRC dumps (Matt Roper)
- Remove reduntant check in xe_vm_create_ioctl() (Xin)
- A bunch of SRIOV updates (Michal)
- Add stats for SVM page-faults (Francois)
- Fix an UAF (Harish)
- Expose fan speed (Raag)
- Fix exporting xe buffer objects multiple times (Tomasz)
- Apply a workaround (Vinay)
- Simplify pinned bo iteration (Thomas)
- Remove an incorrect "static" keywork (Lucas)
- Add support for separate firmware files on each GT (Lucas)
- Survivability handling fixes (Lucas)
- Allow to inject error in early probe (Lucas)
- Fix unmet direct dependencies warning (Yue Haibing)
- More error injection during probe (Francois)
- Coding style fix (Maarten)
- Additional stats support (Riana)
- Add fault injection for xe_oa_alloc_regs (Nakshrtra)
- Add a BMG PCI ID (Matt Roper)
- Some SVM fixes and preliminary SVM multi-device work (Thomas)
- Switch the migrate code from drm managed to dev managed (Aradhya)
- Fix an out-of-bounds shift when invalidating TLB (Thomas)
- Ensure fixed_slice_mode gets set after ccs_mode change (Niranjana)
- Use local fence in error path of xe_migrate_clear (Matthew Brost)
- More Workarounds (Julia)
- Define sysfs_ops on all directories (Tejas)
- Set power state to D3Cold during s2idle/s3 (Badal)
- Devcoredump output fix (John)
- Avoid plain 64-bit division (Arnd Bergmann)
- Reword a debug message (John)
- Don't print a hwconfig error message when forcing execlists (Stuart)
- Restore an error code to avoid a smatch warning (Rodrigo)
- Invalidate L3 read-only cachelines for geometry streams too (Kenneth)
- Make PPHWSP size explicit in xe_gt_lrc_size() (Gustavo)
- Add GT frequency events (Vinay)
- Fix xe_pt_stage_bind_walk kerneldoc (Thomas)
- Add a workaround (Aradhya)
- Rework pinned save/restore (Matthew Auld, Matthew Brost)
- Allow non-contig VRAM kernel BO (Matthew Auld)
- Support non-contig VRAM provisioning for SRIOV (Matthew Auld)
- Allow scratch-pages for unmapped parts of page-faulting VMs. (Oak)
- Ensure XE_BO_FLAG_CPU_ADDR_MIRROR had a unique value (Matt Roper)
- Fix taking an invalid lock on wedge (Lucas)
- Configs and documentation for survivability mode (Riana)
- Remove an unused macro (Shuicheng)
- Work around a page-fault full error (Matt Brost)
- Enable a SRIOV workaround (John)
- Bump the recommended GuC version (John)
- Allow to drop VRAM resizing (Lucas)
- Don't expose privileged debugfs files if VF (Michal)
- Don't show GGTT/LMEM debugfs files under media GT (Michal)
- Adjust ring-buffer emission for maximum possible size (Tvrtko)
- Fix notifier vs folio lock deadlock (Matthew Auld)
- Stop relying on placement for dma-buf unmap Matthew Auld)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Link: https://lore.kernel.org/r/aADWaEFKVmxSnDLo@fedora
This commit is contained in:
Dave Airlie 2025-04-26 08:06:02 +10:00
commit d2b9e2f8a1
84 changed files with 2147 additions and 836 deletions

View File

@ -124,3 +124,27 @@ Contact: intel-xe@lists.freedesktop.org
Description: RO. VRAM temperature in millidegree Celsius.
Only supported for particular Intel Xe graphics platforms.
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan1_input
Date: March 2025
KernelVersion: 6.14
Contact: intel-xe@lists.freedesktop.org
Description: RO. Fan 1 speed in RPM.
Only supported for particular Intel Xe graphics platforms.
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan2_input
Date: March 2025
KernelVersion: 6.14
Contact: intel-xe@lists.freedesktop.org
Description: RO. Fan 2 speed in RPM.
Only supported for particular Intel Xe graphics platforms.
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan3_input
Date: March 2025
KernelVersion: 6.14
Contact: intel-xe@lists.freedesktop.org
Description: RO. Fan 3 speed in RPM.
Only supported for particular Intel Xe graphics platforms.

View File

@ -25,3 +25,4 @@ DG2, etc is provided to prototype the driver.
xe_debugging
xe_devcoredump
xe-drm-usage-stats.rst
xe_configfs

View File

@ -0,0 +1,10 @@
.. SPDX-License-Identifier: GPL-2.0+
.. _xe_configfs:
============
Xe Configfs
============
.. kernel-doc:: drivers/gpu/drm/xe/xe_configfs.c
:doc: Xe Configfs

View File

@ -12,3 +12,10 @@ Internal API
.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
:internal:
==================
Boot Survivability
==================
.. kernel-doc:: drivers/gpu/drm/xe/xe_survivability_mode.c
:doc: Xe Boot Survivability

View File

@ -39,7 +39,6 @@ config DRM_XE
select DRM_TTM_HELPER
select DRM_EXEC
select DRM_GPUVM
select DRM_GPUSVM if !UML && DEVICE_PRIVATE
select DRM_SCHED
select MMU_NOTIFIER
select WANT_DEV_COREDUMP
@ -74,9 +73,22 @@ config DRM_XE_DP_TUNNEL
If in doubt say "Y".
config DRM_XE_GPUSVM
bool "Enable CPU to GPU address mirroring"
depends on DRM_XE
depends on !UML
depends on DEVICE_PRIVATE
default y
select DRM_GPUSVM
help
Enable this option if you want support for CPU to GPU address
mirroring.
If in doubut say "Y".
config DRM_XE_DEVMEM_MIRROR
bool "Enable device memory mirror"
depends on DRM_XE
depends on DRM_XE_GPUSVM
select GET_FREE_REGION
default y
help

View File

@ -125,12 +125,13 @@ xe-y += xe_bb.o \
xe_wopcm.o
xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o
xe-$(CONFIG_DRM_GPUSVM) += xe_svm.o
xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o
# graphics hardware monitoring (HWMON) support
xe-$(CONFIG_HWMON) += xe_hwmon.o
xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o
xe-$(CONFIG_CONFIGFS_FS) += xe_configfs.o
# graphics virtualization (SR-IOV) support
xe-y += \

View File

@ -141,6 +141,7 @@ enum xe_guc_action {
XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C,
XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D,
XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,

View File

@ -367,6 +367,7 @@ enum xe_guc_klv_ids {
GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008,
GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009,
GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a,
GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH = 0x900b,
};
#endif

View File

@ -45,7 +45,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
NULL, size,
ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
XE_BO_FLAG_STOLEN |
XE_BO_FLAG_GGTT | XE_BO_FLAG_PINNED);
XE_BO_FLAG_GGTT);
if (!IS_ERR(obj))
drm_info(&xe->drm, "Allocated fbdev into stolen\n");
else
@ -56,7 +56,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size,
ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
XE_BO_FLAG_GGTT | XE_BO_FLAG_PINNED);
XE_BO_FLAG_GGTT);
}
if (IS_ERR(obj)) {

View File

@ -83,7 +83,7 @@ initial_plane_bo(struct xe_device *xe,
if (plane_config->size == 0)
return NULL;
flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT;
flags = XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT;
base = round_down(plane_config->base, page_size);
if (IS_DGFX(xe)) {

View File

@ -0,0 +1,79 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2025 Intel Corporation
*/
#ifndef _XE_ALU_COMMANDS_H_
#define _XE_ALU_COMMANDS_H_
#include "instructions/xe_instr_defs.h"
/* Instruction Opcodes */
#define CS_ALU_OPCODE_NOOP 0x000
#define CS_ALU_OPCODE_FENCE_RD 0x001
#define CS_ALU_OPCODE_FENCE_WR 0x002
#define CS_ALU_OPCODE_LOAD 0x080
#define CS_ALU_OPCODE_LOADINV 0x480
#define CS_ALU_OPCODE_LOAD0 0x081
#define CS_ALU_OPCODE_LOAD1 0x481
#define CS_ALU_OPCODE_LOADIND 0x082
#define CS_ALU_OPCODE_ADD 0x100
#define CS_ALU_OPCODE_SUB 0x101
#define CS_ALU_OPCODE_AND 0x102
#define CS_ALU_OPCODE_OR 0x103
#define CS_ALU_OPCODE_XOR 0x104
#define CS_ALU_OPCODE_SHL 0x105
#define CS_ALU_OPCODE_SHR 0x106
#define CS_ALU_OPCODE_SAR 0x107
#define CS_ALU_OPCODE_STORE 0x180
#define CS_ALU_OPCODE_STOREINV 0x580
#define CS_ALU_OPCODE_STOREIND 0x181
/* Instruction Operands */
#define CS_ALU_OPERAND_REG(n) REG_FIELD_PREP(GENMASK(3, 0), (n))
#define CS_ALU_OPERAND_REG0 0x0
#define CS_ALU_OPERAND_REG1 0x1
#define CS_ALU_OPERAND_REG2 0x2
#define CS_ALU_OPERAND_REG3 0x3
#define CS_ALU_OPERAND_REG4 0x4
#define CS_ALU_OPERAND_REG5 0x5
#define CS_ALU_OPERAND_REG6 0x6
#define CS_ALU_OPERAND_REG7 0x7
#define CS_ALU_OPERAND_REG8 0x8
#define CS_ALU_OPERAND_REG9 0x9
#define CS_ALU_OPERAND_REG10 0xa
#define CS_ALU_OPERAND_REG11 0xb
#define CS_ALU_OPERAND_REG12 0xc
#define CS_ALU_OPERAND_REG13 0xd
#define CS_ALU_OPERAND_REG14 0xe
#define CS_ALU_OPERAND_REG15 0xf
#define CS_ALU_OPERAND_SRCA 0x20
#define CS_ALU_OPERAND_SRCB 0x21
#define CS_ALU_OPERAND_ACCU 0x31
#define CS_ALU_OPERAND_ZF 0x32
#define CS_ALU_OPERAND_CF 0x33
#define CS_ALU_OPERAND_NA 0 /* N/A operand */
/* Command Streamer ALU Instructions */
#define CS_ALU_INSTR(opcode, op1, op2) (REG_FIELD_PREP(GENMASK(31, 20), (opcode)) | \
REG_FIELD_PREP(GENMASK(19, 10), (op1)) | \
REG_FIELD_PREP(GENMASK(9, 0), (op2)))
#define __CS_ALU_INSTR(opcode, op1, op2) CS_ALU_INSTR(CS_ALU_OPCODE_##opcode, \
CS_ALU_OPERAND_##op1, \
CS_ALU_OPERAND_##op2)
#define CS_ALU_INSTR_NOOP __CS_ALU_INSTR(NOOP, NA, NA)
#define CS_ALU_INSTR_LOAD(op1, op2) __CS_ALU_INSTR(LOAD, op1, op2)
#define CS_ALU_INSTR_LOADINV(op1, op2) __CS_ALU_INSTR(LOADINV, op1, op2)
#define CS_ALU_INSTR_LOAD0(op1) __CS_ALU_INSTR(LOAD0, op1, NA)
#define CS_ALU_INSTR_LOAD1(op1) __CS_ALU_INSTR(LOAD1, op1, NA)
#define CS_ALU_INSTR_ADD __CS_ALU_INSTR(ADD, NA, NA)
#define CS_ALU_INSTR_SUB __CS_ALU_INSTR(SUB, NA, NA)
#define CS_ALU_INSTR_AND __CS_ALU_INSTR(AND, NA, NA)
#define CS_ALU_INSTR_OR __CS_ALU_INSTR(OR, NA, NA)
#define CS_ALU_INSTR_XOR __CS_ALU_INSTR(XOR, NA, NA)
#define CS_ALU_INSTR_STORE(op1, op2) __CS_ALU_INSTR(STORE, op1, op2)
#define CS_ALU_INSTR_STOREINV(op1, op2) __CS_ALU_INSTR(STOREINV, op1, op2)
#endif

View File

@ -137,6 +137,7 @@
#define CMD_3DSTATE_CLIP_MESH GFXPIPE_3D_CMD(0x0, 0x81)
#define CMD_3DSTATE_SBE_MESH GFXPIPE_3D_CMD(0x0, 0x82)
#define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER GFXPIPE_3D_CMD(0x0, 0x83)
#define CMD_3DSTATE_COARSE_PIXEL GFXPIPE_3D_CMD(0x0, 0x89)
#define CMD_3DSTATE_DRAWING_RECTANGLE GFXPIPE_3D_CMD(0x1, 0x0)
#define CMD_3DSTATE_CHROMA_KEY GFXPIPE_3D_CMD(0x1, 0x4)

View File

@ -32,6 +32,7 @@
#define MI_BATCH_BUFFER_END __MI_INSTR(0xA)
#define MI_TOPOLOGY_FILTER __MI_INSTR(0xD)
#define MI_FORCE_WAKEUP __MI_INSTR(0x1D)
#define MI_MATH(n) (__MI_INSTR(0x1A) | XE_INSTR_NUM_DW((n) + 1))
#define MI_STORE_DATA_IMM __MI_INSTR(0x20)
#define MI_SDI_GGTT REG_BIT(22)
@ -61,6 +62,10 @@
#define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4))
#define MI_LRM_USE_GGTT REG_BIT(22)
#define MI_LOAD_REGISTER_REG (__MI_INSTR(0x2a) | XE_INSTR_NUM_DW(3))
#define MI_LRR_DST_CS_MMIO REG_BIT(19)
#define MI_LRR_SRC_CS_MMIO REG_BIT(18)
#define MI_COPY_MEM_MEM (__MI_INSTR(0x2e) | XE_INSTR_NUM_DW(5))
#define MI_COPY_MEM_MEM_SRC_GGTT REG_BIT(22)
#define MI_COPY_MEM_MEM_DST_GGTT REG_BIT(21)

View File

@ -188,6 +188,10 @@
#define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1)
#define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0)
#define CS_GPR_DATA(base, n) XE_REG((base) + 0x600 + (n) * 4)
#define CS_GPR_REG(base, n) CS_GPR_DATA((base), (n) * 2)
#define CS_GPR_REG_UDW(base, n) CS_GPR_DATA((base), (n) * 2 + 1)
#define VDBOX_CGCTL3F08(base) XE_REG((base) + 0x3f08)
#define CG3DDISHRS_CLKGATE_DIS REG_BIT(5)

View File

@ -62,7 +62,6 @@
#define LE_SSE_MASK REG_GENMASK(18, 17)
#define LE_SSE(value) REG_FIELD_PREP(LE_SSE_MASK, value)
#define LE_COS_MASK REG_GENMASK(16, 15)
#define LE_COS(value) REG_FIELD_PREP(LE_COS_MASK)
#define LE_SCF_MASK REG_BIT(14)
#define LE_SCF(value) REG_FIELD_PREP(LE_SCF_MASK, value)
#define LE_PFM_MASK REG_GENMASK(13, 11)
@ -392,6 +391,18 @@
#define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4)
#define XEHP_LNESPARE REG_BIT(19)
#define LSN_VC_REG2 XE_REG_MCR(0xb0c8)
#define LSN_LNI_WGT_MASK REG_GENMASK(31, 28)
#define LSN_LNI_WGT(value) REG_FIELD_PREP(LSN_LNI_WGT_MASK, value)
#define LSN_LNE_WGT_MASK REG_GENMASK(27, 24)
#define LSN_LNE_WGT(value) REG_FIELD_PREP(LSN_LNE_WGT_MASK, value)
#define LSN_DIM_X_WGT_MASK REG_GENMASK(23, 20)
#define LSN_DIM_X_WGT(value) REG_FIELD_PREP(LSN_DIM_X_WGT_MASK, value)
#define LSN_DIM_Y_WGT_MASK REG_GENMASK(19, 16)
#define LSN_DIM_Y_WGT(value) REG_FIELD_PREP(LSN_DIM_Y_WGT_MASK, value)
#define LSN_DIM_Z_WGT_MASK REG_GENMASK(15, 12)
#define LSN_DIM_Z_WGT(value) REG_FIELD_PREP(LSN_DIM_Z_WGT_MASK, value)
#define L3SQCREG2 XE_REG_MCR(0xb104)
#define COMPMEMRD256BOVRFETCHEN REG_BIT(20)

View File

@ -21,6 +21,9 @@
#define BMG_PACKAGE_POWER_SKU XE_REG(0x138098)
#define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc)
#define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120)
#define BMG_FAN_1_SPEED XE_REG(0x138140)
#define BMG_FAN_2_SPEED XE_REG(0x138170)
#define BMG_FAN_3_SPEED XE_REG(0x1381a0)
#define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0)
#define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434)
#define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440)

View File

@ -252,7 +252,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
for_each_gt(__gt, xe, id)
xe_gt_sanitize(__gt);
err = xe_bo_restore_kernel(xe);
err = xe_bo_restore_early(xe);
/*
* Snapshotting the CTB and copying back a potentially old
* version seems risky, depending on what might have been
@ -273,7 +273,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
goto cleanup_all;
}
err = xe_bo_restore_user(xe);
err = xe_bo_restore_late(xe);
if (err) {
KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
goto cleanup_all;

View File

@ -202,8 +202,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_PINNED);
XE_BO_FLAG_VRAM_IF_DGFX(tile));
if (IS_ERR(big)) {
KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
goto vunmap;
@ -211,8 +210,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_PINNED);
XE_BO_FLAG_VRAM_IF_DGFX(tile));
if (IS_ERR(pt)) {
KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
PTR_ERR(pt));
@ -222,8 +220,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
tiny = xe_bo_create_pin_map(xe, tile, m->q->vm,
2 * SZ_4K,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_PINNED);
XE_BO_FLAG_VRAM_IF_DGFX(tile));
if (IS_ERR(tiny)) {
KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n",
PTR_ERR(tiny));

View File

@ -55,6 +55,8 @@ static struct ttm_placement sys_placement = {
.placement = &sys_placement_flags,
};
static struct ttm_placement purge_placement;
static const struct ttm_place tt_placement_flags[] = {
{
.fpfn = 0,
@ -189,11 +191,18 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
static bool force_contiguous(u32 bo_flags)
{
if (bo_flags & XE_BO_FLAG_STOLEN)
return true; /* users expect this */
else if (bo_flags & XE_BO_FLAG_PINNED &&
!(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
return true; /* needs vmap */
/*
* For eviction / restore on suspend / resume objects pinned in VRAM
* must be contiguous, also only contiguous BOs support xe_bo_vmap.
*/
return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS &&
bo_flags & XE_BO_FLAG_PINNED;
}
static void add_vram(struct xe_device *xe, struct xe_bo *bo,
@ -281,6 +290,8 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
static void xe_evict_flags(struct ttm_buffer_object *tbo,
struct ttm_placement *placement)
{
struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
struct xe_bo *bo;
if (!xe_bo_is_xe_bo(tbo)) {
@ -290,7 +301,7 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
return;
}
*placement = sys_placement;
*placement = device_unplugged ? purge_placement : sys_placement;
return;
}
@ -300,6 +311,11 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
return;
}
if (device_unplugged && !tbo->base.dma_buf) {
*placement = purge_placement;
return;
}
/*
* For xe, sg bos that are evicted to system just triggers a
* rebind of the sg list upon subsequent validation to XE_PL_TT.
@ -657,11 +673,20 @@ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
ttm);
struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
struct sg_table *sg;
xe_assert(xe, attach);
xe_assert(xe, ttm_bo->ttm);
if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
ttm_bo->sg) {
dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
false, MAX_SCHEDULE_TIMEOUT);
dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
ttm_bo->sg = NULL;
}
if (new_res->mem_type == XE_PL_SYSTEM)
goto out;
@ -898,79 +923,44 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
xe_pm_runtime_get_noresume(xe);
}
if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
/*
* Kernel memory that is pinned should only be moved on suspend
* / resume, some of the pinned memory is required for the
* device to resume / use the GPU to move other evicted memory
* (user memory) around. This likely could be optimized a bit
* further where we find the minimum set of pinned memory
* required for resume but for simplity doing a memcpy for all
* pinned memory.
*/
ret = xe_bo_vmap(bo);
if (!ret) {
ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
if (move_lacks_source) {
u32 flags = 0;
/* Create a new VMAP once kernel BO back in VRAM */
if (!ret && resource_is_vram(new_mem)) {
struct xe_vram_region *vram = res_to_mem_region(new_mem);
void __iomem *new_addr = vram->mapping +
(new_mem->start << PAGE_SHIFT);
if (mem_type_is_vram(new_mem->mem_type))
flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
else if (handle_system_ccs)
flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
ret = -EINVAL;
xe_pm_runtime_put(xe);
goto out;
}
xe_assert(xe, new_mem->start ==
bo->placements->fpfn);
iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
}
fence = xe_migrate_clear(migrate, bo, new_mem, flags);
} else {
fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem,
handle_system_ccs);
}
if (IS_ERR(fence)) {
ret = PTR_ERR(fence);
xe_pm_runtime_put(xe);
goto out;
}
if (!move_lacks_source) {
ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
new_mem);
if (ret) {
dma_fence_wait(fence, false);
ttm_bo_move_null(ttm_bo, new_mem);
ret = 0;
}
} else {
if (move_lacks_source) {
u32 flags = 0;
if (mem_type_is_vram(new_mem->mem_type))
flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
else if (handle_system_ccs)
flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
fence = xe_migrate_clear(migrate, bo, new_mem, flags);
}
else
fence = xe_migrate_copy(migrate, bo, bo, old_mem,
new_mem, handle_system_ccs);
if (IS_ERR(fence)) {
ret = PTR_ERR(fence);
xe_pm_runtime_put(xe);
goto out;
}
if (!move_lacks_source) {
ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
true, new_mem);
if (ret) {
dma_fence_wait(fence, false);
ttm_bo_move_null(ttm_bo, new_mem);
ret = 0;
}
} else {
/*
* ttm_bo_move_accel_cleanup() may blow up if
* bo->resource == NULL, so just attach the
* fence and set the new resource.
*/
dma_resv_add_fence(ttm_bo->base.resv, fence,
DMA_RESV_USAGE_KERNEL);
ttm_bo_move_null(ttm_bo, new_mem);
}
dma_fence_put(fence);
/*
* ttm_bo_move_accel_cleanup() may blow up if
* bo->resource == NULL, so just attach the
* fence and set the new resource.
*/
dma_resv_add_fence(ttm_bo->base.resv, fence,
DMA_RESV_USAGE_KERNEL);
ttm_bo_move_null(ttm_bo, new_mem);
}
dma_fence_put(fence);
xe_pm_runtime_put(xe);
out:
@ -1107,59 +1097,93 @@ out_unref:
*/
int xe_bo_evict_pinned(struct xe_bo *bo)
{
struct ttm_place place = {
.mem_type = XE_PL_TT,
};
struct ttm_placement placement = {
.placement = &place,
.num_placement = 1,
};
struct ttm_operation_ctx ctx = {
.interruptible = false,
.gfp_retry_mayfail = true,
};
struct ttm_resource *new_mem;
int ret;
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
struct xe_bo *backup;
bool unmap = false;
int ret = 0;
xe_bo_assert_held(bo);
xe_bo_lock(bo, false);
if (WARN_ON(!bo->ttm.resource))
return -EINVAL;
if (WARN_ON(!xe_bo_is_pinned(bo)))
return -EINVAL;
if (!xe_bo_is_vram(bo))
return 0;
ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
if (ret)
return ret;
if (!bo->ttm.ttm) {
bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
if (!bo->ttm.ttm) {
ret = -ENOMEM;
goto err_res_free;
}
if (WARN_ON(!bo->ttm.resource)) {
ret = -EINVAL;
goto out_unlock_bo;
}
ret = ttm_bo_populate(&bo->ttm, &ctx);
if (WARN_ON(!xe_bo_is_pinned(bo))) {
ret = -EINVAL;
goto out_unlock_bo;
}
if (!xe_bo_is_vram(bo))
goto out_unlock_bo;
if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
goto out_unlock_bo;
backup = xe_bo_create_locked(xe, NULL, NULL, bo->size, ttm_bo_type_kernel,
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
XE_BO_FLAG_PINNED);
if (IS_ERR(backup)) {
ret = PTR_ERR(backup);
goto out_unlock_bo;
}
if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
struct xe_migrate *migrate;
struct dma_fence *fence;
if (bo->tile)
migrate = bo->tile->migrate;
else
migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
if (ret)
goto out_backup;
ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
if (ret)
goto out_backup;
fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
backup->ttm.resource, false);
if (IS_ERR(fence)) {
ret = PTR_ERR(fence);
goto out_backup;
}
dma_resv_add_fence(bo->ttm.base.resv, fence,
DMA_RESV_USAGE_KERNEL);
dma_resv_add_fence(backup->ttm.base.resv, fence,
DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
} else {
ret = xe_bo_vmap(backup);
if (ret)
goto out_backup;
if (iosys_map_is_null(&bo->vmap)) {
ret = xe_bo_vmap(bo);
if (ret)
goto out_backup;
unmap = true;
}
xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
bo->size);
}
bo->backup_obj = backup;
out_backup:
xe_bo_vunmap(backup);
xe_bo_unlock(backup);
if (ret)
goto err_res_free;
ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
if (ret)
goto err_res_free;
ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
if (ret)
goto err_res_free;
return 0;
err_res_free:
ttm_resource_free(&bo->ttm, &new_mem);
xe_bo_put(backup);
out_unlock_bo:
if (unmap)
xe_bo_vunmap(bo);
xe_bo_unlock(bo);
return ret;
}
@ -1180,48 +1204,108 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
.interruptible = false,
.gfp_retry_mayfail = false,
};
struct ttm_resource *new_mem;
struct ttm_place *place = &bo->placements[0];
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
struct xe_bo *backup = bo->backup_obj;
bool unmap = false;
int ret;
xe_bo_assert_held(bo);
if (WARN_ON(!bo->ttm.resource))
return -EINVAL;
if (WARN_ON(!xe_bo_is_pinned(bo)))
return -EINVAL;
if (WARN_ON(xe_bo_is_vram(bo)))
return -EINVAL;
if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
return -EINVAL;
if (!mem_type_is_vram(place->mem_type))
if (!backup)
return 0;
ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
if (ret)
return ret;
xe_bo_lock(backup, false);
ret = ttm_bo_populate(&bo->ttm, &ctx);
ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
if (ret)
goto err_res_free;
goto out_backup;
ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
if (ret)
goto err_res_free;
if (WARN_ON(!dma_resv_trylock(bo->ttm.base.resv))) {
ret = -EBUSY;
goto out_backup;
}
ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
if (ret)
goto err_res_free;
if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
struct xe_migrate *migrate;
struct dma_fence *fence;
if (bo->tile)
migrate = bo->tile->migrate;
else
migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
if (ret)
goto out_unlock_bo;
ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
if (ret)
goto out_unlock_bo;
fence = xe_migrate_copy(migrate, backup, bo,
backup->ttm.resource, bo->ttm.resource,
false);
if (IS_ERR(fence)) {
ret = PTR_ERR(fence);
goto out_unlock_bo;
}
dma_resv_add_fence(bo->ttm.base.resv, fence,
DMA_RESV_USAGE_KERNEL);
dma_resv_add_fence(backup->ttm.base.resv, fence,
DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
} else {
ret = xe_bo_vmap(backup);
if (ret)
goto out_unlock_bo;
if (iosys_map_is_null(&bo->vmap)) {
ret = xe_bo_vmap(bo);
if (ret)
goto out_unlock_bo;
unmap = true;
}
xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
bo->size);
}
bo->backup_obj = NULL;
out_unlock_bo:
if (unmap)
xe_bo_vunmap(bo);
xe_bo_unlock(bo);
out_backup:
xe_bo_vunmap(backup);
xe_bo_unlock(backup);
if (!bo->backup_obj)
xe_bo_put(backup);
return ret;
}
int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
{
struct ttm_buffer_object *ttm_bo = &bo->ttm;
struct ttm_tt *tt = ttm_bo->ttm;
if (tt) {
struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
dma_buf_unmap_attachment(ttm_bo->base.import_attach,
ttm_bo->sg,
DMA_BIDIRECTIONAL);
ttm_bo->sg = NULL;
xe_tt->sg = NULL;
} else if (xe_tt->sg) {
dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
DMA_BIDIRECTIONAL, 0);
sg_free_table(xe_tt->sg);
xe_tt->sg = NULL;
}
}
return 0;
err_res_free:
ttm_resource_free(&bo->ttm, &new_mem);
return ret;
}
static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
@ -1947,7 +2031,7 @@ struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
flags |= XE_BO_FLAG_GGTT;
bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
alignment);
if (IS_ERR(bo))
return bo;
@ -2049,7 +2133,8 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str
struct xe_bo *bo;
u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
XE_BO_FLAG_PINNED_NORESTORE);
xe_assert(xe, IS_DGFX(xe));
xe_assert(xe, !(*src)->vmap.is_iomem);
@ -2073,10 +2158,16 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
{
struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
if (res->mem_type == XE_PL_STOLEN)
switch (res->mem_type) {
case XE_PL_STOLEN:
return xe_ttm_stolen_gpu_offset(xe);
return res_to_mem_region(res)->dpa_base;
case XE_PL_TT:
case XE_PL_SYSTEM:
return 0;
default:
return res_to_mem_region(res)->dpa_base;
}
return 0;
}
/**
@ -2102,12 +2193,9 @@ int xe_bo_pin_external(struct xe_bo *bo)
if (err)
return err;
if (xe_bo_is_vram(bo)) {
spin_lock(&xe->pinned.lock);
list_add_tail(&bo->pinned_link,
&xe->pinned.external_vram);
spin_unlock(&xe->pinned.lock);
}
spin_lock(&xe->pinned.lock);
list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
spin_unlock(&xe->pinned.lock);
}
ttm_bo_pin(&bo->ttm);
@ -2149,25 +2237,12 @@ int xe_bo_pin(struct xe_bo *bo)
if (err)
return err;
/*
* For pinned objects in on DGFX, which are also in vram, we expect
* these to be in contiguous VRAM memory. Required eviction / restore
* during suspend / resume (force restore to same physical address).
*/
if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
if (mem_type_is_vram(place->mem_type)) {
xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
}
}
if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
spin_lock(&xe->pinned.lock);
list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)
list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present);
else
list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present);
spin_unlock(&xe->pinned.lock);
}

View File

@ -39,20 +39,23 @@
#define XE_BO_FLAG_NEEDS_64K BIT(15)
#define XE_BO_FLAG_NEEDS_2M BIT(16)
#define XE_BO_FLAG_GGTT_INVALIDATE BIT(17)
#define XE_BO_FLAG_GGTT0 BIT(18)
#define XE_BO_FLAG_GGTT1 BIT(19)
#define XE_BO_FLAG_GGTT2 BIT(20)
#define XE_BO_FLAG_GGTT3 BIT(21)
#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \
XE_BO_FLAG_GGTT1 | \
XE_BO_FLAG_GGTT2 | \
XE_BO_FLAG_GGTT3)
#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(22)
#define XE_BO_FLAG_PINNED_NORESTORE BIT(18)
#define XE_BO_FLAG_PINNED_LATE_RESTORE BIT(19)
#define XE_BO_FLAG_GGTT0 BIT(20)
#define XE_BO_FLAG_GGTT1 BIT(21)
#define XE_BO_FLAG_GGTT2 BIT(22)
#define XE_BO_FLAG_GGTT3 BIT(23)
#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24)
/* this one is trigger internally only */
#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
#define XE_BO_FLAG_INTERNAL_64K BIT(31)
#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \
XE_BO_FLAG_GGTT1 | \
XE_BO_FLAG_GGTT2 | \
XE_BO_FLAG_GGTT3)
#define XE_BO_FLAG_GGTTx(tile) \
(XE_BO_FLAG_GGTT0 << (tile)->id)
@ -276,6 +279,8 @@ int xe_bo_evict(struct xe_bo *bo);
int xe_bo_evict_pinned(struct xe_bo *bo);
int xe_bo_restore_pinned(struct xe_bo *bo);
int xe_bo_dma_unmap_pinned(struct xe_bo *bo);
extern const struct ttm_device_funcs xe_ttm_funcs;
extern const char *const xe_mem_type_to_name[];

View File

@ -10,6 +10,42 @@
#include "xe_ggtt.h"
#include "xe_tile.h"
typedef int (*xe_pinned_fn)(struct xe_bo *bo);
static int xe_bo_apply_to_pinned(struct xe_device *xe,
struct list_head *pinned_list,
struct list_head *new_list,
const xe_pinned_fn pinned_fn)
{
LIST_HEAD(still_in_list);
struct xe_bo *bo;
int ret = 0;
spin_lock(&xe->pinned.lock);
while (!ret) {
bo = list_first_entry_or_null(pinned_list, typeof(*bo),
pinned_link);
if (!bo)
break;
xe_bo_get(bo);
list_move_tail(&bo->pinned_link, &still_in_list);
spin_unlock(&xe->pinned.lock);
ret = pinned_fn(bo);
if (ret && pinned_list != new_list) {
spin_lock(&xe->pinned.lock);
list_move(&bo->pinned_link, pinned_list);
spin_unlock(&xe->pinned.lock);
}
xe_bo_put(bo);
spin_lock(&xe->pinned.lock);
}
list_splice_tail(&still_in_list, new_list);
spin_unlock(&xe->pinned.lock);
return ret;
}
/**
* xe_bo_evict_all - evict all BOs from VRAM
*
@ -27,9 +63,7 @@
int xe_bo_evict_all(struct xe_device *xe)
{
struct ttm_device *bdev = &xe->ttm;
struct xe_bo *bo;
struct xe_tile *tile;
struct list_head still_in_list;
u32 mem_type;
u8 id;
int ret;
@ -57,34 +91,13 @@ int xe_bo_evict_all(struct xe_device *xe)
}
}
/* Pinned user memory in VRAM */
INIT_LIST_HEAD(&still_in_list);
spin_lock(&xe->pinned.lock);
for (;;) {
bo = list_first_entry_or_null(&xe->pinned.external_vram,
typeof(*bo), pinned_link);
if (!bo)
break;
xe_bo_get(bo);
list_move_tail(&bo->pinned_link, &still_in_list);
spin_unlock(&xe->pinned.lock);
ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
&xe->pinned.late.external,
xe_bo_evict_pinned);
xe_bo_lock(bo, false);
ret = xe_bo_evict_pinned(bo);
xe_bo_unlock(bo);
xe_bo_put(bo);
if (ret) {
spin_lock(&xe->pinned.lock);
list_splice_tail(&still_in_list,
&xe->pinned.external_vram);
spin_unlock(&xe->pinned.lock);
return ret;
}
spin_lock(&xe->pinned.lock);
}
list_splice_tail(&still_in_list, &xe->pinned.external_vram);
spin_unlock(&xe->pinned.lock);
if (!ret)
ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
&xe->pinned.late.evicted, xe_bo_evict_pinned);
/*
* Wait for all user BO to be evicted as those evictions depend on the
@ -93,32 +106,49 @@ int xe_bo_evict_all(struct xe_device *xe)
for_each_tile(tile, xe, id)
xe_tile_migrate_wait(tile);
spin_lock(&xe->pinned.lock);
for (;;) {
bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
typeof(*bo), pinned_link);
if (!bo)
break;
xe_bo_get(bo);
list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
spin_unlock(&xe->pinned.lock);
if (ret)
return ret;
xe_bo_lock(bo, false);
ret = xe_bo_evict_pinned(bo);
xe_bo_unlock(bo);
xe_bo_put(bo);
if (ret)
return ret;
return xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present,
&xe->pinned.early.evicted,
xe_bo_evict_pinned);
}
spin_lock(&xe->pinned.lock);
static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo)
{
struct xe_device *xe = xe_bo_device(bo);
int ret;
ret = xe_bo_restore_pinned(bo);
if (ret)
return ret;
if (bo->flags & XE_BO_FLAG_GGTT) {
struct xe_tile *tile;
u8 id;
for_each_tile(tile, xe_bo_device(bo), id) {
if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile)))
continue;
mutex_lock(&tile->mem.ggtt->lock);
xe_ggtt_map_bo(tile->mem.ggtt, bo);
mutex_unlock(&tile->mem.ggtt->lock);
}
}
spin_unlock(&xe->pinned.lock);
/*
* We expect validate to trigger a move VRAM and our move code
* should setup the iosys map.
*/
xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) ||
!iosys_map_is_null(&bo->vmap));
return 0;
}
/**
* xe_bo_restore_kernel - restore kernel BOs to VRAM
* xe_bo_restore_early - restore early phase kernel BOs to VRAM
*
* @xe: xe device
*
@ -128,111 +158,130 @@ int xe_bo_evict_all(struct xe_device *xe)
* This function should be called early, before trying to init the GT, on device
* resume.
*/
int xe_bo_restore_kernel(struct xe_device *xe)
int xe_bo_restore_early(struct xe_device *xe)
{
struct xe_bo *bo;
int ret;
spin_lock(&xe->pinned.lock);
for (;;) {
bo = list_first_entry_or_null(&xe->pinned.evicted,
typeof(*bo), pinned_link);
if (!bo)
break;
xe_bo_get(bo);
list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
spin_unlock(&xe->pinned.lock);
xe_bo_lock(bo, false);
ret = xe_bo_restore_pinned(bo);
xe_bo_unlock(bo);
if (ret) {
xe_bo_put(bo);
return ret;
}
if (bo->flags & XE_BO_FLAG_GGTT) {
struct xe_tile *tile;
u8 id;
for_each_tile(tile, xe, id) {
if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile)))
continue;
mutex_lock(&tile->mem.ggtt->lock);
xe_ggtt_map_bo(tile->mem.ggtt, bo);
mutex_unlock(&tile->mem.ggtt->lock);
}
}
/*
* We expect validate to trigger a move VRAM and our move code
* should setup the iosys map.
*/
xe_assert(xe, !iosys_map_is_null(&bo->vmap));
xe_bo_put(bo);
spin_lock(&xe->pinned.lock);
}
spin_unlock(&xe->pinned.lock);
return 0;
return xe_bo_apply_to_pinned(xe, &xe->pinned.early.evicted,
&xe->pinned.early.kernel_bo_present,
xe_bo_restore_and_map_ggtt);
}
/**
* xe_bo_restore_user - restore pinned user BOs to VRAM
* xe_bo_restore_late - restore pinned late phase BOs
*
* @xe: xe device
*
* Move pinned user BOs from temporary (typically system) memory to VRAM via
* CPU. All moves done via TTM calls.
* Move pinned user and kernel BOs which can use blitter from temporary
* (typically system) memory to VRAM. All moves done via TTM calls.
*
* This function should be called late, after GT init, on device resume.
*/
int xe_bo_restore_user(struct xe_device *xe)
int xe_bo_restore_late(struct xe_device *xe)
{
struct xe_bo *bo;
struct xe_tile *tile;
struct list_head still_in_list;
u8 id;
int ret;
int ret, id;
ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.evicted,
&xe->pinned.late.kernel_bo_present,
xe_bo_restore_and_map_ggtt);
for_each_tile(tile, xe, id)
xe_tile_migrate_wait(tile);
if (ret)
return ret;
if (!IS_DGFX(xe))
return 0;
/* Pinned user memory in VRAM should be validated on resume */
INIT_LIST_HEAD(&still_in_list);
spin_lock(&xe->pinned.lock);
for (;;) {
bo = list_first_entry_or_null(&xe->pinned.external_vram,
typeof(*bo), pinned_link);
if (!bo)
break;
list_move_tail(&bo->pinned_link, &still_in_list);
xe_bo_get(bo);
spin_unlock(&xe->pinned.lock);
xe_bo_lock(bo, false);
ret = xe_bo_restore_pinned(bo);
xe_bo_unlock(bo);
xe_bo_put(bo);
if (ret) {
spin_lock(&xe->pinned.lock);
list_splice_tail(&still_in_list,
&xe->pinned.external_vram);
spin_unlock(&xe->pinned.lock);
return ret;
}
spin_lock(&xe->pinned.lock);
}
list_splice_tail(&still_in_list, &xe->pinned.external_vram);
spin_unlock(&xe->pinned.lock);
ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
&xe->pinned.late.external,
xe_bo_restore_pinned);
/* Wait for restore to complete */
for_each_tile(tile, xe, id)
xe_tile_migrate_wait(tile);
return 0;
return ret;
}
static void xe_bo_pci_dev_remove_pinned(struct xe_device *xe)
{
struct xe_tile *tile;
unsigned int id;
(void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
&xe->pinned.late.external,
xe_bo_dma_unmap_pinned);
for_each_tile(tile, xe, id)
xe_tile_migrate_wait(tile);
}
/**
* xe_bo_pci_dev_remove_all() - Handle bos when the pci_device is about to be removed
* @xe: The xe device.
*
* On pci_device removal we need to drop all dma mappings and move
* the data of exported bos out to system. This includes SVM bos and
* exported dma-buf bos. This is done by evicting all bos, but
* the evict placement in xe_evict_flags() is chosen such that all
* bos except those mentioned are purged, and thus their memory
* is released.
*
* For pinned bos, we're unmapping dma.
*/
void xe_bo_pci_dev_remove_all(struct xe_device *xe)
{
unsigned int mem_type;
/*
* Move pagemap bos and exported dma-buf to system, and
* purge everything else.
*/
for (mem_type = XE_PL_VRAM1; mem_type >= XE_PL_TT; --mem_type) {
struct ttm_resource_manager *man =
ttm_manager_type(&xe->ttm, mem_type);
if (man) {
int ret = ttm_resource_manager_evict_all(&xe->ttm, man);
drm_WARN_ON(&xe->drm, ret);
}
}
xe_bo_pci_dev_remove_pinned(xe);
}
static void xe_bo_pinned_fini(void *arg)
{
struct xe_device *xe = arg;
(void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
&xe->pinned.late.kernel_bo_present,
xe_bo_dma_unmap_pinned);
(void)xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present,
&xe->pinned.early.kernel_bo_present,
xe_bo_dma_unmap_pinned);
}
/**
* xe_bo_pinned_init() - Initialize pinned bo tracking
* @xe: The xe device.
*
* Initializes the lists and locks required for pinned bo
* tracking and registers a callback to dma-unmap
* any remaining pinned bos on pci device removal.
*
* Return: %0 on success, negative error code on error.
*/
int xe_bo_pinned_init(struct xe_device *xe)
{
spin_lock_init(&xe->pinned.lock);
INIT_LIST_HEAD(&xe->pinned.early.kernel_bo_present);
INIT_LIST_HEAD(&xe->pinned.early.evicted);
INIT_LIST_HEAD(&xe->pinned.late.kernel_bo_present);
INIT_LIST_HEAD(&xe->pinned.late.evicted);
INIT_LIST_HEAD(&xe->pinned.late.external);
return devm_add_action_or_reset(xe->drm.dev, xe_bo_pinned_fini, xe);
}

View File

@ -9,7 +9,10 @@
struct xe_device;
int xe_bo_evict_all(struct xe_device *xe);
int xe_bo_restore_kernel(struct xe_device *xe);
int xe_bo_restore_user(struct xe_device *xe);
int xe_bo_restore_early(struct xe_device *xe);
int xe_bo_restore_late(struct xe_device *xe);
void xe_bo_pci_dev_remove_all(struct xe_device *xe);
int xe_bo_pinned_init(struct xe_device *xe);
#endif

View File

@ -28,6 +28,8 @@ struct xe_vm;
struct xe_bo {
/** @ttm: TTM base buffer object */
struct ttm_buffer_object ttm;
/** @backup_obj: The backup object when pinned and suspended (vram only) */
struct xe_bo *backup_obj;
/** @size: Size of this buffer object */
size_t size;
/** @flags: flags for this buffer object */

View File

@ -0,0 +1,250 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2025 Intel Corporation
*/
#include <linux/configfs.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/pci.h>
#include "xe_configfs.h"
#include "xe_module.h"
/**
* DOC: Xe Configfs
*
* Overview
* =========
*
* Configfs is a filesystem-based manager of kernel objects. XE KMD registers a
* configfs subsystem called ``'xe'`` that creates a directory in the mounted configfs directory
* The user can create devices under this directory and configure them as necessary
* See Documentation/filesystems/configfs.rst for more information about how configfs works.
*
* Create devices
* ===============
*
* In order to create a device, the user has to create a directory inside ``'xe'``::
*
* mkdir /sys/kernel/config/xe/0000:03:00.0/
*
* Every device created is populated by the driver with entries that can be
* used to configure it::
*
* /sys/kernel/config/xe/
* .. 0000:03:00.0/
* ... survivability_mode
*
* Configure Attributes
* ====================
*
* Survivability mode:
* -------------------
*
* Enable survivability mode on supported cards. This setting only takes
* effect when probing the device. Example to enable it::
*
* # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode
* # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported)
*
* Remove devices
* ==============
*
* The created device directories can be removed using ``rmdir``::
*
* rmdir /sys/kernel/config/xe/0000:03:00.0/
*/
struct xe_config_device {
struct config_group group;
bool survivability_mode;
/* protects attributes */
struct mutex lock;
};
static struct xe_config_device *to_xe_config_device(struct config_item *item)
{
return container_of(to_config_group(item), struct xe_config_device, group);
}
static ssize_t survivability_mode_show(struct config_item *item, char *page)
{
struct xe_config_device *dev = to_xe_config_device(item);
return sprintf(page, "%d\n", dev->survivability_mode);
}
static ssize_t survivability_mode_store(struct config_item *item, const char *page, size_t len)
{
struct xe_config_device *dev = to_xe_config_device(item);
bool survivability_mode;
int ret;
ret = kstrtobool(page, &survivability_mode);
if (ret)
return ret;
mutex_lock(&dev->lock);
dev->survivability_mode = survivability_mode;
mutex_unlock(&dev->lock);
return len;
}
CONFIGFS_ATTR(, survivability_mode);
static struct configfs_attribute *xe_config_device_attrs[] = {
&attr_survivability_mode,
NULL,
};
static void xe_config_device_release(struct config_item *item)
{
struct xe_config_device *dev = to_xe_config_device(item);
mutex_destroy(&dev->lock);
kfree(dev);
}
static struct configfs_item_operations xe_config_device_ops = {
.release = xe_config_device_release,
};
static const struct config_item_type xe_config_device_type = {
.ct_item_ops = &xe_config_device_ops,
.ct_attrs = xe_config_device_attrs,
.ct_owner = THIS_MODULE,
};
static struct config_group *xe_config_make_device_group(struct config_group *group,
const char *name)
{
unsigned int domain, bus, slot, function;
struct xe_config_device *dev;
struct pci_dev *pdev;
int ret;
ret = sscanf(name, "%04x:%02x:%02x.%x", &domain, &bus, &slot, &function);
if (ret != 4)
return ERR_PTR(-EINVAL);
pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function));
if (!pdev)
return ERR_PTR(-EINVAL);
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return ERR_PTR(-ENOMEM);
config_group_init_type_name(&dev->group, name, &xe_config_device_type);
mutex_init(&dev->lock);
return &dev->group;
}
static struct configfs_group_operations xe_config_device_group_ops = {
.make_group = xe_config_make_device_group,
};
static const struct config_item_type xe_configfs_type = {
.ct_group_ops = &xe_config_device_group_ops,
.ct_owner = THIS_MODULE,
};
static struct configfs_subsystem xe_configfs = {
.su_group = {
.cg_item = {
.ci_namebuf = "xe",
.ci_type = &xe_configfs_type,
},
},
};
static struct xe_config_device *configfs_find_group(struct pci_dev *pdev)
{
struct config_item *item;
char name[64];
snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pci_domain_nr(pdev->bus),
pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
mutex_lock(&xe_configfs.su_mutex);
item = config_group_find_item(&xe_configfs.su_group, name);
mutex_unlock(&xe_configfs.su_mutex);
if (!item)
return NULL;
return to_xe_config_device(item);
}
/**
* xe_configfs_get_survivability_mode - get configfs survivability mode attribute
* @pdev: pci device
*
* find the configfs group that belongs to the pci device and return
* the survivability mode attribute
*
* Return: survivability mode if config group is found, false otherwise
*/
bool xe_configfs_get_survivability_mode(struct pci_dev *pdev)
{
struct xe_config_device *dev = configfs_find_group(pdev);
bool mode;
if (!dev)
return false;
mode = dev->survivability_mode;
config_item_put(&dev->group.cg_item);
return mode;
}
/**
* xe_configfs_clear_survivability_mode - clear configfs survivability mode attribute
* @pdev: pci device
*
* find the configfs group that belongs to the pci device and clear survivability
* mode attribute
*/
void xe_configfs_clear_survivability_mode(struct pci_dev *pdev)
{
struct xe_config_device *dev = configfs_find_group(pdev);
if (!dev)
return;
mutex_lock(&dev->lock);
dev->survivability_mode = 0;
mutex_unlock(&dev->lock);
config_item_put(&dev->group.cg_item);
}
int __init xe_configfs_init(void)
{
struct config_group *root = &xe_configfs.su_group;
int ret;
config_group_init(root);
mutex_init(&xe_configfs.su_mutex);
ret = configfs_register_subsystem(&xe_configfs);
if (ret) {
pr_err("Error %d while registering %s subsystem\n",
ret, root->cg_item.ci_namebuf);
return ret;
}
return 0;
}
void __exit xe_configfs_exit(void)
{
configfs_unregister_subsystem(&xe_configfs);
}

View File

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2025 Intel Corporation
*/
#ifndef _XE_CONFIGFS_H_
#define _XE_CONFIGFS_H_
#include <linux/types.h>
struct pci_dev;
#if IS_ENABLED(CONFIG_CONFIGFS_FS)
int xe_configfs_init(void);
void xe_configfs_exit(void);
bool xe_configfs_get_survivability_mode(struct pci_dev *pdev);
void xe_configfs_clear_survivability_mode(struct pci_dev *pdev);
#else
static inline int xe_configfs_init(void) { return 0; };
static inline void xe_configfs_exit(void) {};
static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; };
static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) {};
#endif
#endif

View File

@ -23,6 +23,7 @@
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
#include "xe_bo.h"
#include "xe_bo_evict.h"
#include "xe_debugfs.h"
#include "xe_devcoredump.h"
#include "xe_dma_buf.h"
@ -467,10 +468,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xa_erase(&xe->usm.asid_to_vm, asid);
}
spin_lock_init(&xe->pinned.lock);
INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
INIT_LIST_HEAD(&xe->pinned.external_vram);
INIT_LIST_HEAD(&xe->pinned.evicted);
err = xe_bo_pinned_init(xe);
if (err)
goto err;
xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
WQ_MEM_RECLAIM);
@ -505,7 +505,15 @@ ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
static bool xe_driver_flr_disabled(struct xe_device *xe)
{
return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
if (IS_SRIOV_VF(xe))
return true;
if (xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
drm_info(&xe->drm, "Driver-FLR disabled by BIOS\n");
return true;
}
return false;
}
/*
@ -523,7 +531,7 @@ static bool xe_driver_flr_disabled(struct xe_device *xe)
*/
static void __xe_driver_flr(struct xe_device *xe)
{
const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
const unsigned int flr_timeout = 3 * USEC_PER_SEC; /* specs recommend a 3s wait */
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
int ret;
@ -569,10 +577,8 @@ static void __xe_driver_flr(struct xe_device *xe)
static void xe_driver_flr(struct xe_device *xe)
{
if (xe_driver_flr_disabled(xe)) {
drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
if (xe_driver_flr_disabled(xe))
return;
}
__xe_driver_flr(xe);
}
@ -706,7 +712,7 @@ int xe_device_probe_early(struct xe_device *xe)
sriov_update_device_info(xe);
err = xe_pcode_probe_early(xe);
if (err) {
if (err || xe_survivability_mode_is_requested(xe)) {
int save_err = err;
/*
@ -729,6 +735,7 @@ int xe_device_probe_early(struct xe_device *xe)
return 0;
}
ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */
static int probe_has_flat_ccs(struct xe_device *xe)
{
@ -932,6 +939,8 @@ void xe_device_remove(struct xe_device *xe)
xe_display_unregister(xe);
drm_dev_unplug(&xe->drm);
xe_bo_pci_dev_remove_all(xe);
}
void xe_device_shutdown(struct xe_device *xe)

View File

@ -107,6 +107,9 @@ struct xe_vram_region {
resource_size_t actual_physical_size;
/** @mapping: pointer to VRAM mappable space */
void __iomem *mapping;
/** @ttm: VRAM TTM manager */
struct xe_ttm_vram_mgr ttm;
#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
/** @pagemap: Used to remap device memory as ZONE_DEVICE */
struct dev_pagemap pagemap;
/**
@ -120,8 +123,7 @@ struct xe_vram_region {
* This is generated when remap device memory as ZONE_DEVICE
*/
resource_size_t hpa_base;
/** @ttm: VRAM TTM manager */
struct xe_ttm_vram_mgr ttm;
#endif
};
/**
@ -314,6 +316,8 @@ struct xe_device {
u8 has_atomic_enable_pte_bit:1;
/** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
u8 has_device_atomics_on_smem:1;
/** @info.has_fan_control: Device supports fan control */
u8 has_fan_control:1;
/** @info.has_flat_ccs: Whether flat CCS metadata is used */
u8 has_flat_ccs:1;
/** @info.has_heci_cscfi: device has heci cscfi */
@ -332,6 +336,8 @@ struct xe_device {
u8 has_usm:1;
/** @info.is_dgfx: is discrete device */
u8 is_dgfx:1;
/** @info.needs_scratch: needs scratch page for oob prefetch to work */
u8 needs_scratch:1;
/**
* @info.probe_display: Probe display hardware. If set to
* false, the driver will behave as if there is no display
@ -418,12 +424,22 @@ struct xe_device {
struct {
/** @pinned.lock: protected pinned BO list state */
spinlock_t lock;
/** @pinned.kernel_bo_present: pinned kernel BO that are present */
struct list_head kernel_bo_present;
/** @pinned.evicted: pinned BO that have been evicted */
struct list_head evicted;
/** @pinned.external_vram: pinned external BO in vram*/
struct list_head external_vram;
/** @pinned.early: early pinned lists */
struct {
/** @pinned.early.kernel_bo_present: pinned kernel BO that are present */
struct list_head kernel_bo_present;
/** @pinned.early.evicted: pinned BO that have been evicted */
struct list_head evicted;
} early;
/** @pinned.late: late pinned lists */
struct {
/** @pinned.late.kernel_bo_present: pinned kernel BO that are present */
struct list_head kernel_bo_present;
/** @pinned.late.evicted: pinned BO that have been evicted */
struct list_head evicted;
/** @pinned.external: pinned external and dma-buf. */
struct list_head external;
} late;
} pinned;
/** @ufence_wq: user fence wait queue */

View File

@ -145,10 +145,7 @@ static void xe_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
struct dma_buf *dma_buf = attach->dmabuf;
struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv);
if (!xe_bo_is_vram(bo)) {
if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);

View File

@ -49,9 +49,6 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
fw->gt = gt;
spin_lock_init(&fw->lock);
/* Assuming gen11+ so assert this assumption is correct */
xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
if (xe->info.graphics_verx100 >= 1270) {
init_domain(fw, XE_FW_DOMAIN_ID_GT,
FORCEWAKE_GT,
@ -67,9 +64,6 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
{
int i, j;
/* Assuming gen11+ so assert this assumption is correct */
xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
if (!xe_gt_is_media_type(gt))
init_domain(fw, XE_FW_DOMAIN_ID_RENDER,
FORCEWAKE_RENDER,

View File

@ -365,7 +365,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
* scratch entries, rather keep the scratch page in system memory on
* platforms where 64K pages are needed for VRAM.
*/
flags = XE_BO_FLAG_PINNED;
flags = 0;
if (ggtt->flags & XE_GGTT_FLAGS_64K)
flags |= XE_BO_FLAG_SYSTEM;
else

View File

@ -12,8 +12,10 @@
#include <generated/xe_wa_oob.h>
#include "instructions/xe_alu_commands.h"
#include "instructions/xe_gfxpipe_commands.h"
#include "instructions/xe_mi_commands.h"
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_bb.h"
@ -176,15 +178,6 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
return 0;
}
/*
* Convert back from encoded value to type-safe, only to be used when reg.mcr
* is true
*/
static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
{
return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
}
static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
{
struct xe_reg_sr *sr = &q->hwe->reg_lrc;
@ -194,6 +187,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
struct xe_bb *bb;
struct dma_fence *fence;
long timeout;
int count_rmw = 0;
int count = 0;
if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
@ -206,30 +200,32 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
if (IS_ERR(bb))
return PTR_ERR(bb);
xa_for_each(&sr->xa, idx, entry)
++count;
/* count RMW registers as those will be handled separately */
xa_for_each(&sr->xa, idx, entry) {
if (entry->reg.masked || entry->clr_bits == ~0)
++count;
else
++count_rmw;
}
if (count || count_rmw)
xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
if (count) {
xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
/* emit single LRI with all non RMW regs */
bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
xa_for_each(&sr->xa, idx, entry) {
struct xe_reg reg = entry->reg;
struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
u32 val;
/*
* Skip reading the register if it's not really needed
*/
if (reg.masked)
val = entry->clr_bits << 16;
else if (entry->clr_bits + 1)
val = (reg.mcr ?
xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
xe_mmio_read32(&gt->mmio, reg)) & (~entry->clr_bits);
else
else if (entry->clr_bits == ~0)
val = 0;
else
continue;
val |= entry->set_bits;
@ -239,6 +235,52 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
}
}
if (count_rmw) {
/* emit MI_MATH for each RMW reg */
xa_for_each(&sr->xa, idx, entry) {
if (entry->reg.masked || entry->clr_bits == ~0)
continue;
bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
bb->cs[bb->len++] = entry->reg.addr;
bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
MI_LRI_LRM_CS_MMIO;
bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr;
bb->cs[bb->len++] = entry->clr_bits;
bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr;
bb->cs[bb->len++] = entry->set_bits;
bb->cs[bb->len++] = MI_MATH(8);
bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0);
bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1);
bb->cs[bb->len++] = CS_ALU_INSTR_AND;
bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU);
bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0);
bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2);
bb->cs[bb->len++] = CS_ALU_INSTR_OR;
bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU);
bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO;
bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
bb->cs[bb->len++] = entry->reg.addr;
xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
entry->reg.addr, entry->clr_bits, entry->set_bits);
}
/* reset used GPR */
bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO;
bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
bb->cs[bb->len++] = 0;
bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr;
bb->cs[bb->len++] = 0;
bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr;
bb->cs[bb->len++] = 0;
}
xe_lrc_emit_hwe_state_instructions(q, bb);
job = xe_bb_create_job(q, bb);

View File

@ -299,20 +299,20 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p)
return 0;
}
static const struct drm_info_list debugfs_list[] = {
{"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
/*
* only for GT debugfs files which can be safely used on the VF as well:
* - without access to the GT privileged registers
* - without access to the PF specific data
*/
static const struct drm_info_list vf_safe_debugfs_list[] = {
{"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset},
{"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data = force_reset_sync},
{"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info},
{"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
{"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
{"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
{"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info},
{"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
{"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
{"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings},
{"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
{"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs},
{"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc},
{"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc},
{"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
@ -322,6 +322,15 @@ static const struct drm_info_list debugfs_list[] = {
{"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig},
};
/* everything else should be added here */
static const struct drm_info_list pf_only_debugfs_list[] = {
{"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
{"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs},
{"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
{"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info},
{"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
};
void xe_gt_debugfs_register(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
@ -345,10 +354,15 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
*/
root->d_inode->i_private = gt;
drm_debugfs_create_files(debugfs_list,
ARRAY_SIZE(debugfs_list),
drm_debugfs_create_files(vf_safe_debugfs_list,
ARRAY_SIZE(vf_safe_debugfs_list),
root, minor);
if (!IS_SRIOV_VF(xe))
drm_debugfs_create_files(pf_only_debugfs_list,
ARRAY_SIZE(pf_only_debugfs_list),
root, minor);
xe_uc_debugfs_register(&gt->uc, root);
if (IS_SRIOV_PF(xe))

View File

@ -345,7 +345,8 @@ fallback:
* Some older platforms don't have tables or don't have complete tables.
* Newer platforms should always have the required info.
*/
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000)
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000 &&
!gt_to_xe(gt)->info.force_execlist)
xe_gt_err(gt, "Slice/Subslice counts missing from hwconfig table; using typical fallback values\n");
if (gt_to_xe(gt)->info.platform == XE_PVC)

View File

@ -240,7 +240,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
atomic = access_is_atomic(pf->access_type);
if (xe_vma_is_cpu_addr_mirror(vma))
err = xe_svm_handle_pagefault(vm, vma, gt_to_tile(gt),
err = xe_svm_handle_pagefault(vm, vma, gt,
pf->page_addr, atomic);
else
err = handle_vma_pagefault(gt, vma, atomic);
@ -435,9 +435,16 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
XE_MAX_EU_FUSE_BITS) * num_dss;
/* user can issue separate page faults per EU and per CS */
/*
* user can issue separate page faults per EU and per CS
*
* XXX: Multiplier required as compute UMD are getting PF queue errors
* without it. Follow on why this multiplier is required.
*/
#define PF_MULTIPLIER 8
pf_queue->num_dw =
(num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW;
(num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER;
#undef PF_MULTIPLIER
pf_queue->gt = gt;
pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw,

View File

@ -1444,15 +1444,23 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
return 0;
xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M);
bo = xe_bo_create_pin_map(xe, tile, NULL,
ALIGN(size, PAGE_SIZE),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_NEEDS_2M |
XE_BO_FLAG_PINNED);
bo = xe_bo_create_locked(xe, tile, NULL,
ALIGN(size, PAGE_SIZE),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_NEEDS_2M |
XE_BO_FLAG_PINNED |
XE_BO_FLAG_PINNED_LATE_RESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
err = xe_bo_pin(bo);
xe_bo_unlock(bo);
if (unlikely(err)) {
xe_bo_put(bo);
return err;
}
config->lmem_obj = bo;
if (xe_device_has_lmtt(xe)) {

View File

@ -51,26 +51,17 @@ static unsigned int extract_vfid(struct dentry *d)
* /sys/kernel/debug/dri/0/
* gt0
*    pf
*       ggtt_available
*       ggtt_provisioned
*       contexts_provisioned
*       doorbells_provisioned
*       runtime_registers
*       negotiated_versions
*       adverse_events
* gt1
*    pf
*       ...
*/
static const struct drm_info_list pf_info[] = {
{
"ggtt_available",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_config_print_available_ggtt,
},
{
"ggtt_provisioned",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_config_print_ggtt,
},
{
"contexts_provisioned",
.show = xe_gt_debugfs_simple_show,
@ -81,11 +72,6 @@ static const struct drm_info_list pf_info[] = {
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_config_print_dbs,
},
{
"lmem_provisioned",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_config_print_lmem,
},
{
"runtime_registers",
.show = xe_gt_debugfs_simple_show,
@ -103,6 +89,42 @@ static const struct drm_info_list pf_info[] = {
},
};
/*
* /sys/kernel/debug/dri/0/
* gt0
*    pf
*       ggtt_available
*       ggtt_provisioned
*/
static const struct drm_info_list pf_ggtt_info[] = {
{
"ggtt_available",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_config_print_available_ggtt,
},
{
"ggtt_provisioned",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_config_print_ggtt,
},
};
/*
* /sys/kernel/debug/dri/0/
* gt0
*    pf
*       lmem_provisioned
*/
static const struct drm_info_list pf_lmem_info[] = {
{
"lmem_provisioned",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_config_print_lmem,
},
};
/*
* /sys/kernel/debug/dri/0/
* gt0
@ -532,6 +554,16 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
pfdentry->d_inode->i_private = gt;
drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor);
if (!xe_gt_is_media_type(gt)) {
drm_debugfs_create_files(pf_ggtt_info,
ARRAY_SIZE(pf_ggtt_info),
pfdentry, minor);
if (IS_DGFX(gt_to_xe(gt)))
drm_debugfs_create_files(pf_lmem_info,
ARRAY_SIZE(pf_lmem_info),
pfdentry, minor);
}
pf_add_policy_attrs(gt, pfdentry);
pf_add_config_attrs(gt, pfdentry, PFID);

View File

@ -112,7 +112,6 @@ static const struct xe_reg tgl_runtime_regs[] = {
XELP_GT_SLICE_ENABLE, /* _MMIO(0x9138) */
XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */
GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
@ -124,7 +123,6 @@ static const struct xe_reg ats_m_runtime_regs[] = {
XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */
GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
@ -136,7 +134,6 @@ static const struct xe_reg pvc_runtime_regs[] = {
GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
CTC_MODE, /* _MMIO(0xA26C) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
@ -150,7 +147,6 @@ static const struct xe_reg ver_1270_runtime_regs[] = {
GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
@ -167,7 +163,6 @@ static const struct xe_reg ver_2000_runtime_regs[] = {
XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */
XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */
XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */
CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
@ -185,7 +180,6 @@ static const struct xe_reg ver_3000_runtime_regs[] = {
XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */
XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */
XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */
CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};

View File

@ -27,6 +27,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
}
static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
"svm_pagefault_count",
"tlb_inval_count",
"vma_pagefault_count",
"vma_pagefault_kb",

View File

@ -7,6 +7,7 @@
#define _XE_GT_STATS_TYPES_H_
enum xe_gt_stats_id {
XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT,
XE_GT_STATS_ID_TLB_INVAL,
XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT,
XE_GT_STATS_ID_VMA_PAGEFAULT_KB,

View File

@ -483,7 +483,8 @@ static int guc_g2g_alloc(struct xe_guc *guc)
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_ALL |
XE_BO_FLAG_GGTT_INVALIDATE);
XE_BO_FLAG_GGTT_INVALIDATE |
XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);

View File

@ -376,6 +376,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET,
&offset, &remain);
if (GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_WA(gt, 16026508708))
guc_waklv_enable_simple(ads,
GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH,
&offset, &remain);
size = guc_ads_waklv_size(ads) - remain;
if (!size)
return;
@ -414,7 +419,8 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE);
XE_BO_FLAG_GGTT_INVALIDATE |
XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
@ -490,24 +496,52 @@ static void fill_engine_enable_masks(struct xe_gt *gt,
engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
}
static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
/*
* Write the offsets corresponding to the golden LRCs. The actual data is
* populated later by guc_golden_lrc_populate()
*/
static void guc_golden_lrc_init(struct xe_guc_ads *ads)
{
struct xe_device *xe = ads_to_xe(ads);
struct xe_gt *gt = ads_to_gt(ads);
struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
offsetof(struct __guc_ads_blob, system_info));
u8 guc_class;
size_t alloc_size, real_size;
u32 addr_ggtt, offset;
int class;
offset = guc_ads_golden_lrc_offset(ads);
addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
u8 guc_class;
guc_class = xe_engine_class_to_guc_class(class);
for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
if (!info_map_read(xe, &info_map,
engine_enabled_masks[guc_class]))
continue;
real_size = xe_gt_lrc_size(gt, class);
alloc_size = PAGE_ALIGN(real_size);
/*
* This interface is slightly confusing. We need to pass the
* base address of the full golden context and the size of just
* the engine state, which is the section of the context image
* that starts after the execlists LRC registers. This is
* required to allow the GuC to restore just the engine state
* when a watchdog reset occurs.
* We calculate the engine state size by removing the size of
* what comes before it in the context image (which is identical
* on all engines).
*/
ads_blob_write(ads, ads.eng_state_size[guc_class],
guc_ads_golden_lrc_size(ads) -
xe_lrc_skip_size(xe));
real_size - xe_lrc_skip_size(xe));
ads_blob_write(ads, ads.golden_context_lrca[guc_class],
xe_bo_ggtt_addr(ads->bo) +
guc_ads_golden_lrc_offset(ads));
addr_ggtt);
addr_ggtt += alloc_size;
}
}
@ -682,8 +716,8 @@ static int guc_capture_prep_lists(struct xe_guc_ads *ads)
}
if (ads->capture_size != PAGE_ALIGN(total_size))
xe_gt_dbg(gt, "ADS capture alloc size changed from %d to %d\n",
ads->capture_size, PAGE_ALIGN(total_size));
xe_gt_dbg(gt, "Updated ADS capture size %d (was %d)\n",
PAGE_ALIGN(total_size), ads->capture_size);
return PAGE_ALIGN(total_size);
}
@ -857,7 +891,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
guc_policies_init(ads);
guc_prep_golden_lrc_null(ads);
guc_golden_lrc_init(ads);
guc_mapping_table_init_invalid(gt, &info_map);
guc_doorbell_init(ads);
@ -883,7 +917,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_policies_init(ads);
fill_engine_enable_masks(gt, &info_map);
guc_mmio_reg_state_init(ads);
guc_prep_golden_lrc_null(ads);
guc_golden_lrc_init(ads);
guc_mapping_table_init(gt, &info_map);
guc_capture_prep_lists(ads);
guc_doorbell_init(ads);
@ -903,18 +937,22 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_ads_private_data_offset(ads));
}
static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
/*
* After the golden LRC's are recorded for each engine class by the first
* submission, copy them to the ADS, as initialized earlier by
* guc_golden_lrc_init().
*/
static void guc_golden_lrc_populate(struct xe_guc_ads *ads)
{
struct xe_device *xe = ads_to_xe(ads);
struct xe_gt *gt = ads_to_gt(ads);
struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
offsetof(struct __guc_ads_blob, system_info));
size_t total_size = 0, alloc_size, real_size;
u32 addr_ggtt, offset;
u32 offset;
int class;
offset = guc_ads_golden_lrc_offset(ads);
addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
u8 guc_class;
@ -931,26 +969,9 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size;
/*
* This interface is slightly confusing. We need to pass the
* base address of the full golden context and the size of just
* the engine state, which is the section of the context image
* that starts after the execlists LRC registers. This is
* required to allow the GuC to restore just the engine state
* when a watchdog reset occurs.
* We calculate the engine state size by removing the size of
* what comes before it in the context image (which is identical
* on all engines).
*/
ads_blob_write(ads, ads.eng_state_size[guc_class],
real_size - xe_lrc_skip_size(xe));
ads_blob_write(ads, ads.golden_context_lrca[guc_class],
addr_ggtt);
xe_map_memcpy_to(xe, ads_to_map(ads), offset,
gt->default_lrc[class], real_size);
addr_ggtt += alloc_size;
offset += alloc_size;
}
@ -959,7 +980,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
{
guc_populate_golden_lrc(ads);
guc_golden_lrc_populate(ads);
}
static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)

View File

@ -238,7 +238,8 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE);
XE_BO_FLAG_GGTT_INVALIDATE |
XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
@ -1828,10 +1829,10 @@ static void ct_dead_print(struct xe_dead_ct *dead)
return;
}
drm_printf(&lp, "CTB is dead - reason=0x%X\n", dead->reason);
/* Can't generate a genuine core dump at this point, so just do the good bits */
drm_puts(&lp, "**** Xe Device Coredump ****\n");
drm_printf(&lp, "Reason: CTB is dead - 0x%X\n", dead->reason);
xe_device_snapshot_print(xe, &lp);
drm_printf(&lp, "**** GT #%d ****\n", gt->info.id);

View File

@ -17,101 +17,119 @@
#include "xe_macros.h"
#include "xe_pm.h"
static struct xe_guc *node_to_guc(struct drm_info_node *node)
/*
* guc_debugfs_show - A show callback for struct drm_info_list
* @m: the &seq_file
* @data: data used by the drm debugfs helpers
*
* This callback can be used in struct drm_info_list to describe debugfs
* files that are &xe_guc specific in similar way how we handle &xe_gt
* specific files using &xe_gt_debugfs_simple_show.
*
* It is assumed that those debugfs files will be created on directory entry
* which grandparent struct dentry d_inode->i_private points to &xe_gt.
*
* /sys/kernel/debug/dri/0/
* gt0 # dent->d_parent->d_parent (d_inode->i_private == gt)
*    uc # dent->d_parent
*       guc_info # dent
*       guc_...
*
* This function assumes that &m->private will be set to the &struct
* drm_info_node corresponding to the instance of the info on a given &struct
* drm_minor (see struct drm_info_list.show for details).
*
* This function also assumes that struct drm_info_list.data will point to the
* function code that will actually print a file content::
*
* int (*print)(struct xe_guc *, struct drm_printer *)
*
* Example::
*
* int foo(struct xe_guc *guc, struct drm_printer *p)
* {
* drm_printf(p, "enabled %d\n", guc->submission_state.enabled);
* return 0;
* }
*
* static const struct drm_info_list bar[] = {
* { name = "foo", .show = guc_debugfs_show, .data = foo },
* };
*
* parent = debugfs_create_dir("uc", gtdir);
* drm_debugfs_create_files(bar, ARRAY_SIZE(bar), parent, minor);
*
* Return: 0 on success or a negative error code on failure.
*/
static int guc_debugfs_show(struct seq_file *m, void *data)
{
return node->info_ent->data;
}
static int guc_info(struct seq_file *m, void *data)
{
struct xe_guc *guc = node_to_guc(m->private);
struct xe_device *xe = guc_to_xe(guc);
struct drm_printer p = drm_seq_file_printer(m);
struct drm_info_node *node = m->private;
struct dentry *parent = node->dent->d_parent;
struct dentry *grandparent = parent->d_parent;
struct xe_gt *gt = grandparent->d_inode->i_private;
struct xe_device *xe = gt_to_xe(gt);
int (*print)(struct xe_guc *, struct drm_printer *) = node->info_ent->data;
int ret;
xe_pm_runtime_get(xe);
xe_guc_print_info(guc, &p);
ret = print(&gt->uc.guc, &p);
xe_pm_runtime_put(xe);
return ret;
}
static int guc_log(struct xe_guc *guc, struct drm_printer *p)
{
xe_guc_log_print(&guc->log, p);
return 0;
}
static int guc_log(struct seq_file *m, void *data)
static int guc_log_dmesg(struct xe_guc *guc, struct drm_printer *p)
{
struct xe_guc *guc = node_to_guc(m->private);
struct xe_device *xe = guc_to_xe(guc);
struct drm_printer p = drm_seq_file_printer(m);
xe_pm_runtime_get(xe);
xe_guc_log_print(&guc->log, &p);
xe_pm_runtime_put(xe);
return 0;
}
static int guc_log_dmesg(struct seq_file *m, void *data)
{
struct xe_guc *guc = node_to_guc(m->private);
struct xe_device *xe = guc_to_xe(guc);
xe_pm_runtime_get(xe);
xe_guc_log_print_dmesg(&guc->log);
xe_pm_runtime_put(xe);
return 0;
}
static int guc_ctb(struct seq_file *m, void *data)
static int guc_ctb(struct xe_guc *guc, struct drm_printer *p)
{
struct xe_guc *guc = node_to_guc(m->private);
struct xe_device *xe = guc_to_xe(guc);
struct drm_printer p = drm_seq_file_printer(m);
xe_pm_runtime_get(xe);
xe_guc_ct_print(&guc->ct, &p, true);
xe_pm_runtime_put(xe);
xe_guc_ct_print(&guc->ct, p, true);
return 0;
}
static int guc_pc(struct seq_file *m, void *data)
static int guc_pc(struct xe_guc *guc, struct drm_printer *p)
{
struct xe_guc *guc = node_to_guc(m->private);
struct xe_device *xe = guc_to_xe(guc);
struct drm_printer p = drm_seq_file_printer(m);
xe_pm_runtime_get(xe);
xe_guc_pc_print(&guc->pc, &p);
xe_pm_runtime_put(xe);
xe_guc_pc_print(&guc->pc, p);
return 0;
}
static const struct drm_info_list debugfs_list[] = {
{"guc_info", guc_info, 0},
{"guc_log", guc_log, 0},
{"guc_log_dmesg", guc_log_dmesg, 0},
{"guc_ctb", guc_ctb, 0},
{"guc_pc", guc_pc, 0},
/*
* only for GuC debugfs files which can be safely used on the VF as well:
* - without access to the GuC privileged registers
* - without access to the PF specific GuC objects
*/
static const struct drm_info_list vf_safe_debugfs_list[] = {
{ "guc_info", .show = guc_debugfs_show, .data = xe_guc_print_info },
{ "guc_ctb", .show = guc_debugfs_show, .data = guc_ctb },
};
/* everything else should be added here */
static const struct drm_info_list pf_only_debugfs_list[] = {
{ "guc_log", .show = guc_debugfs_show, .data = guc_log },
{ "guc_log_dmesg", .show = guc_debugfs_show, .data = guc_log_dmesg },
{ "guc_pc", .show = guc_debugfs_show, .data = guc_pc },
};
void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
{
struct drm_minor *minor = guc_to_xe(guc)->drm.primary;
struct drm_info_list *local;
int i;
#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
if (!local)
return;
memcpy(local, debugfs_list, DEBUGFS_SIZE);
#undef DEBUGFS_SIZE
for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
local[i].data = guc;
drm_debugfs_create_files(local,
ARRAY_SIZE(debugfs_list),
drm_debugfs_create_files(vf_safe_debugfs_list,
ARRAY_SIZE(vf_safe_debugfs_list),
parent, minor);
if (!IS_SRIOV_VF(guc_to_xe(guc)))
drm_debugfs_create_files(pf_only_debugfs_list,
ARRAY_SIZE(pf_only_debugfs_list),
parent, minor);
}

View File

@ -17,36 +17,61 @@
#include "xe_hw_engine.h"
#include "xe_map.h"
#include "xe_mmio.h"
#include "xe_sriov_pf_helpers.h"
#include "xe_trace_guc.h"
#define TOTAL_QUANTA 0x8000
static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe)
static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe,
unsigned int index)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
struct engine_activity_buffer *buffer = &engine_activity->device_buffer;
struct engine_activity_buffer *buffer;
u16 guc_class = xe_engine_class_to_guc_class(hwe->class);
size_t offset;
offset = offsetof(struct guc_engine_activity_data,
if (engine_activity->num_functions) {
buffer = &engine_activity->function_buffer;
offset = sizeof(struct guc_engine_activity_data) * index;
} else {
buffer = &engine_activity->device_buffer;
offset = 0;
}
offset += offsetof(struct guc_engine_activity_data,
engine_activity[guc_class][hwe->logical_instance]);
return IOSYS_MAP_INIT_OFFSET(&buffer->activity_bo->vmap, offset);
}
static struct iosys_map engine_metadata_map(struct xe_guc *guc)
static struct iosys_map engine_metadata_map(struct xe_guc *guc,
unsigned int index)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
struct engine_activity_buffer *buffer = &engine_activity->device_buffer;
struct engine_activity_buffer *buffer;
size_t offset;
return buffer->metadata_bo->vmap;
if (engine_activity->num_functions) {
buffer = &engine_activity->function_buffer;
offset = sizeof(struct guc_engine_activity_metadata) * index;
} else {
buffer = &engine_activity->device_buffer;
offset = 0;
}
return IOSYS_MAP_INIT_OFFSET(&buffer->metadata_bo->vmap, offset);
}
static int allocate_engine_activity_group(struct xe_guc *guc)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
struct xe_device *xe = guc_to_xe(guc);
u32 num_activity_group = 1; /* Will be modified for VF */
u32 num_activity_group;
/*
* An additional activity group is allocated for PF
*/
num_activity_group = IS_SRIOV_PF(xe) ? xe_sriov_pf_get_totalvfs(xe) + 1 : 1;
engine_activity->eag = drmm_kcalloc(&xe->drm, num_activity_group,
sizeof(struct engine_activity_group), GFP_KERNEL);
@ -60,10 +85,11 @@ static int allocate_engine_activity_group(struct xe_guc *guc)
}
static int allocate_engine_activity_buffers(struct xe_guc *guc,
struct engine_activity_buffer *buffer)
struct engine_activity_buffer *buffer,
int count)
{
u32 metadata_size = sizeof(struct guc_engine_activity_metadata);
u32 size = sizeof(struct guc_engine_activity_data);
u32 metadata_size = sizeof(struct guc_engine_activity_metadata) * count;
u32 size = sizeof(struct guc_engine_activity_data) * count;
struct xe_gt *gt = guc_to_gt(guc);
struct xe_tile *tile = gt_to_tile(gt);
struct xe_bo *bo, *metadata_bo;
@ -118,10 +144,11 @@ static bool is_engine_activity_supported(struct xe_guc *guc)
return true;
}
static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe)
static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe,
unsigned int index)
{
struct xe_guc *guc = &hwe->gt->uc.guc;
struct engine_activity_group *eag = &guc->engine_activity.eag[0];
struct engine_activity_group *eag = &guc->engine_activity.eag[index];
u16 guc_class = xe_engine_class_to_guc_class(hwe->class);
return &eag->engine[guc_class][hwe->logical_instance];
@ -138,9 +165,10 @@ static u64 cpu_ns_to_guc_tsc_tick(ktime_t ns, u32 freq)
#define read_metadata_record(xe_, map_, field_) \
xe_map_rd_field(xe_, map_, 0, struct guc_engine_activity_metadata, field_)
static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
unsigned int index)
{
struct engine_activity *ea = hw_engine_to_engine_activity(hwe);
struct engine_activity *ea = hw_engine_to_engine_activity(hwe, index);
struct guc_engine_activity *cached_activity = &ea->activity;
struct guc_engine_activity_metadata *cached_metadata = &ea->metadata;
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
@ -151,8 +179,8 @@ static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
u64 active_ticks, gpm_ts;
u16 change_num;
activity_map = engine_activity_map(guc, hwe);
metadata_map = engine_metadata_map(guc);
activity_map = engine_activity_map(guc, hwe, index);
metadata_map = engine_metadata_map(guc, index);
global_change_num = read_metadata_record(xe, &metadata_map, global_change_num);
/* GuC has not initialized activity data yet, return 0 */
@ -194,9 +222,9 @@ update:
return ea->total + ea->active;
}
static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe, unsigned int index)
{
struct engine_activity *ea = hw_engine_to_engine_activity(hwe);
struct engine_activity *ea = hw_engine_to_engine_activity(hwe, index);
struct guc_engine_activity_metadata *cached_metadata = &ea->metadata;
struct guc_engine_activity *cached_activity = &ea->activity;
struct iosys_map activity_map, metadata_map;
@ -205,8 +233,8 @@ static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
u64 numerator;
u16 quanta_ratio;
activity_map = engine_activity_map(guc, hwe);
metadata_map = engine_metadata_map(guc);
activity_map = engine_activity_map(guc, hwe, index);
metadata_map = engine_metadata_map(guc, index);
if (!cached_metadata->guc_tsc_frequency_hz)
cached_metadata->guc_tsc_frequency_hz = read_metadata_record(xe, &metadata_map,
@ -245,10 +273,35 @@ static int enable_engine_activity_stats(struct xe_guc *guc)
return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
}
static void engine_activity_set_cpu_ts(struct xe_guc *guc)
static int enable_function_engine_activity_stats(struct xe_guc *guc, bool enable)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
struct engine_activity_group *eag = &engine_activity->eag[0];
u32 metadata_ggtt_addr = 0, ggtt_addr = 0, num_functions = 0;
struct engine_activity_buffer *buffer = &engine_activity->function_buffer;
u32 action[6];
int len = 0;
if (enable) {
metadata_ggtt_addr = xe_bo_ggtt_addr(buffer->metadata_bo);
ggtt_addr = xe_bo_ggtt_addr(buffer->activity_bo);
num_functions = engine_activity->num_functions;
}
action[len++] = XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER;
action[len++] = num_functions;
action[len++] = metadata_ggtt_addr;
action[len++] = 0;
action[len++] = ggtt_addr;
action[len++] = 0;
/* Blocking here to ensure the buffers are ready before reading them */
return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
}
static void engine_activity_set_cpu_ts(struct xe_guc *guc, unsigned int index)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
struct engine_activity_group *eag = &engine_activity->eag[index];
int i, j;
for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++)
@ -265,34 +318,106 @@ static u32 gpm_timestamp_shift(struct xe_gt *gt)
return 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
}
static bool is_function_valid(struct xe_guc *guc, unsigned int fn_id)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
if (!IS_SRIOV_PF(xe) && fn_id)
return false;
if (engine_activity->num_functions && fn_id >= engine_activity->num_functions)
return false;
return true;
}
static int engine_activity_disable_function_stats(struct xe_guc *guc)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
struct engine_activity_buffer *buffer = &engine_activity->function_buffer;
int ret;
if (!engine_activity->num_functions)
return 0;
ret = enable_function_engine_activity_stats(guc, false);
if (ret)
return ret;
free_engine_activity_buffers(buffer);
engine_activity->num_functions = 0;
return 0;
}
static int engine_activity_enable_function_stats(struct xe_guc *guc, int num_vfs)
{
struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
struct engine_activity_buffer *buffer = &engine_activity->function_buffer;
int ret, i;
if (!num_vfs)
return 0;
/* This includes 1 PF and num_vfs */
engine_activity->num_functions = num_vfs + 1;
ret = allocate_engine_activity_buffers(guc, buffer, engine_activity->num_functions);
if (ret)
return ret;
ret = enable_function_engine_activity_stats(guc, true);
if (ret) {
free_engine_activity_buffers(buffer);
engine_activity->num_functions = 0;
return ret;
}
for (i = 0; i < engine_activity->num_functions; i++)
engine_activity_set_cpu_ts(guc, i + 1);
return 0;
}
/**
* xe_guc_engine_activity_active_ticks - Get engine active ticks
* @guc: The GuC object
* @hwe: The hw_engine object
* @fn_id: function id to report on
*
* Return: accumulated ticks @hwe was active since engine activity stats were enabled.
*/
u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
unsigned int fn_id)
{
if (!xe_guc_engine_activity_supported(guc))
return 0;
return get_engine_active_ticks(guc, hwe);
if (!is_function_valid(guc, fn_id))
return 0;
return get_engine_active_ticks(guc, hwe, fn_id);
}
/**
* xe_guc_engine_activity_total_ticks - Get engine total ticks
* @guc: The GuC object
* @hwe: The hw_engine object
* @fn_id: function id to report on
*
* Return: accumulated quanta of ticks allocated for the engine
*/
u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
unsigned int fn_id)
{
if (!xe_guc_engine_activity_supported(guc))
return 0;
return get_engine_total_ticks(guc, hwe);
if (!is_function_valid(guc, fn_id))
return 0;
return get_engine_total_ticks(guc, hwe, fn_id);
}
/**
@ -310,6 +435,25 @@ bool xe_guc_engine_activity_supported(struct xe_guc *guc)
return engine_activity->supported;
}
/**
* xe_guc_engine_activity_function_stats - Enable/Disable per-function engine activity stats
* @guc: The GuC object
* @num_vfs: number of vfs
* @enable: true to enable, false otherwise
*
* Return: 0 on success, negative error code otherwise
*/
int xe_guc_engine_activity_function_stats(struct xe_guc *guc, int num_vfs, bool enable)
{
if (!xe_guc_engine_activity_supported(guc))
return 0;
if (enable)
return engine_activity_enable_function_stats(guc, num_vfs);
return engine_activity_disable_function_stats(guc);
}
/**
* xe_guc_engine_activity_enable_stats - Enable engine activity stats
* @guc: The GuC object
@ -327,7 +471,7 @@ void xe_guc_engine_activity_enable_stats(struct xe_guc *guc)
if (ret)
xe_gt_err(guc_to_gt(guc), "failed to enable activity stats%d\n", ret);
else
engine_activity_set_cpu_ts(guc);
engine_activity_set_cpu_ts(guc, 0);
}
static void engine_activity_fini(void *arg)
@ -360,7 +504,7 @@ int xe_guc_engine_activity_init(struct xe_guc *guc)
return ret;
}
ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer);
ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer, 1);
if (ret) {
xe_gt_err(gt, "failed to allocate engine activity buffers (%pe)\n", ERR_PTR(ret));
return ret;

View File

@ -14,6 +14,9 @@ struct xe_guc;
int xe_guc_engine_activity_init(struct xe_guc *guc);
bool xe_guc_engine_activity_supported(struct xe_guc *guc);
void xe_guc_engine_activity_enable_stats(struct xe_guc *guc);
u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
int xe_guc_engine_activity_function_stats(struct xe_guc *guc, int num_vfs, bool enable);
u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
unsigned int fn_id);
u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe,
unsigned int fn_id);
#endif

View File

@ -79,14 +79,24 @@ struct xe_guc_engine_activity {
/** @num_activity_group: number of activity groups */
u32 num_activity_group;
/** @num_functions: number of functions */
u32 num_functions;
/** @supported: indicates support for engine activity stats */
bool supported;
/** @eag: holds the device level engine activity data */
/**
* @eag: holds the device level engine activity data in native mode.
* In SRIOV mode, points to an array with entries which holds the engine
* activity data for PF and VF's
*/
struct engine_activity_group *eag;
/** @device_buffer: buffer object for global engine activity */
struct engine_activity_buffer device_buffer;
/** @function_buffer: buffer object for per-function engine activity */
struct engine_activity_buffer function_buffer;
};
#endif

View File

@ -260,7 +260,8 @@ int xe_guc_log_init(struct xe_guc_log *log)
bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE);
XE_BO_FLAG_GGTT_INVALIDATE |
XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);

View File

@ -461,6 +461,21 @@ static u32 get_cur_freq(struct xe_gt *gt)
return decode_freq(freq);
}
/**
* xe_guc_pc_get_cur_freq_fw - With fw held, get requested frequency
* @pc: The GuC PC
*
* Returns: the requested frequency for that GT instance
*/
u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
return get_cur_freq(gt);
}
/**
* xe_guc_pc_get_cur_freq - Get Current requested frequency
* @pc: The GuC PC
@ -1170,7 +1185,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
bo = xe_managed_bo_create_pin_map(xe, tile, size,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE);
XE_BO_FLAG_GGTT_INVALIDATE |
XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);

View File

@ -22,6 +22,7 @@ void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p);
u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc);
int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq);
u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc);
u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc);
u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc);
u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc);

View File

@ -300,6 +300,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
primelockdep(guc);
guc->submission_state.initialized = true;
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
}
@ -834,6 +836,13 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
/*
* If device is being wedged even before submission_state is
* initialized, there's nothing to do here.
*/
if (!guc->submission_state.initialized)
return;
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
guc_submit_wedged_fini, guc);
if (err) {

View File

@ -89,6 +89,11 @@ struct xe_guc {
struct mutex lock;
/** @submission_state.enabled: submission is enabled */
bool enabled;
/**
* @submission_state.initialized: mark when submission state is
* even initialized - before that not even the lock is valid
*/
bool initialized;
/** @submission_state.fini_wq: submit fini wait queue */
wait_queue_head_t fini_wq;
} submission_state;

View File

@ -19,29 +19,6 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end)
return (end - start) >> PAGE_SHIFT;
}
/**
* xe_mark_range_accessed() - mark a range is accessed, so core mm
* have such information for memory eviction or write back to
* hard disk
* @range: the range to mark
* @write: if write to this range, we mark pages in this range
* as dirty
*/
static void xe_mark_range_accessed(struct hmm_range *range, bool write)
{
struct page *page;
u64 i, npages;
npages = xe_npages_in_range(range->start, range->end);
for (i = 0; i < npages; i++) {
page = hmm_pfn_to_page(range->hmm_pfns[i]);
if (write)
set_page_dirty_lock(page);
mark_page_accessed(page);
}
}
static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st,
struct hmm_range *range, struct rw_semaphore *notifier_sem)
{
@ -331,7 +308,6 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
if (ret)
goto out_unlock;
xe_mark_range_accessed(&hmm_range, write);
userptr->sg = &userptr->sgt;
xe_hmm_userptr_set_mapped(uvma);
userptr->notifier_seq = hmm_range.notifier_seq;

View File

@ -605,6 +605,7 @@ err_object:
kobject_put(kobj);
return err;
}
ALLOW_ERROR_INJECTION(xe_add_hw_engine_class_defaults, ERRNO); /* See xe_pci_probe() */
static void hw_engine_class_sysfs_fini(void *arg)

View File

@ -5,6 +5,7 @@
#include <linux/hwmon-sysfs.h>
#include <linux/hwmon.h>
#include <linux/jiffies.h>
#include <linux/types.h>
#include <linux/units.h>
@ -27,6 +28,7 @@ enum xe_hwmon_reg {
REG_PKG_POWER_SKU_UNIT,
REG_GT_PERF_STATUS,
REG_PKG_ENERGY_STATUS,
REG_FAN_SPEED,
};
enum xe_hwmon_reg_operation {
@ -42,6 +44,13 @@ enum xe_hwmon_channel {
CHANNEL_MAX,
};
enum xe_fan_channel {
FAN_1,
FAN_2,
FAN_3,
FAN_MAX,
};
/*
* SF_* - scale factors for particular quantities according to hwmon spec.
*/
@ -61,6 +70,16 @@ struct xe_hwmon_energy_info {
long accum_energy;
};
/**
* struct xe_hwmon_fan_info - to cache previous fan reading
*/
struct xe_hwmon_fan_info {
/** @reg_val_prev: previous fan reg val */
u32 reg_val_prev;
/** @time_prev: previous timestamp */
u64 time_prev;
};
/**
* struct xe_hwmon - xe hwmon data structure
*/
@ -79,6 +98,8 @@ struct xe_hwmon {
int scl_shift_time;
/** @ei: Energy info for energyN_input */
struct xe_hwmon_energy_info ei[CHANNEL_MAX];
/** @fi: Fan info for fanN_input */
struct xe_hwmon_fan_info fi[FAN_MAX];
};
static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
@ -144,6 +165,14 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
return PCU_CR_PACKAGE_ENERGY_STATUS;
}
break;
case REG_FAN_SPEED:
if (channel == FAN_1)
return BMG_FAN_1_SPEED;
else if (channel == FAN_2)
return BMG_FAN_2_SPEED;
else if (channel == FAN_3)
return BMG_FAN_3_SPEED;
break;
default:
drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
break;
@ -454,6 +483,7 @@ static const struct hwmon_channel_info * const hwmon_info[] = {
HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL),
HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL),
HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL),
HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT),
NULL
};
@ -480,6 +510,19 @@ static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval)
(uval & POWER_SETUP_I1_DATA_MASK));
}
static int xe_hwmon_pcode_read_fan_control(const struct xe_hwmon *hwmon, u32 subcmd, u32 *uval)
{
struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
/* Platforms that don't return correct value */
if (hwmon->xe->info.platform == XE_DG2 && subcmd == FSC_READ_NUM_FANS) {
*uval = 2;
return 0;
}
return xe_pcode_read(root_tile, PCODE_MBOX(FAN_SPEED_CONTROL, subcmd, 0), uval, NULL);
}
static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel,
long *value, u32 scale_factor)
{
@ -705,6 +748,75 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
}
}
static umode_t
xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
{
u32 uval;
if (!hwmon->xe->info.has_fan_control)
return 0;
switch (attr) {
case hwmon_fan_input:
if (xe_hwmon_pcode_read_fan_control(hwmon, FSC_READ_NUM_FANS, &uval))
return 0;
return channel < uval ? 0444 : 0;
default:
return 0;
}
}
static int
xe_hwmon_fan_input_read(struct xe_hwmon *hwmon, int channel, long *val)
{
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
struct xe_hwmon_fan_info *fi = &hwmon->fi[channel];
u64 rotations, time_now, time;
u32 reg_val;
int ret = 0;
mutex_lock(&hwmon->hwmon_lock);
reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_FAN_SPEED, channel));
time_now = get_jiffies_64();
/*
* HW register value is accumulated count of pulses from PWM fan with the scale
* of 2 pulses per rotation.
*/
rotations = (reg_val - fi->reg_val_prev) / 2;
time = jiffies_delta_to_msecs(time_now - fi->time_prev);
if (unlikely(!time)) {
ret = -EAGAIN;
goto unlock;
}
/*
* Calculate fan speed in RPM by time averaging two subsequent readings in minutes.
* RPM = number of rotations * msecs per minute / time in msecs
*/
*val = DIV_ROUND_UP_ULL(rotations * (MSEC_PER_SEC * 60), time);
fi->reg_val_prev = reg_val;
fi->time_prev = time_now;
unlock:
mutex_unlock(&hwmon->hwmon_lock);
return ret;
}
static int
xe_hwmon_fan_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
{
switch (attr) {
case hwmon_fan_input:
return xe_hwmon_fan_input_read(hwmon, channel, val);
default:
return -EOPNOTSUPP;
}
}
static umode_t
xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
u32 attr, int channel)
@ -730,6 +842,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
case hwmon_energy:
ret = xe_hwmon_energy_is_visible(hwmon, attr, channel);
break;
case hwmon_fan:
ret = xe_hwmon_fan_is_visible(hwmon, attr, channel);
break;
default:
ret = 0;
break;
@ -765,6 +880,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
case hwmon_energy:
ret = xe_hwmon_energy_read(hwmon, attr, channel, val);
break;
case hwmon_fan:
ret = xe_hwmon_fan_read(hwmon, attr, channel, val);
break;
default:
ret = -EOPNOTSUPP;
break;
@ -842,7 +960,7 @@ static void
xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
{
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
long energy;
long energy, fan_speed;
u64 val_sku_unit = 0;
int channel;
struct xe_reg pkg_power_sku_unit;
@ -866,6 +984,11 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
for (channel = 0; channel < CHANNEL_MAX; channel++)
if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel))
xe_hwmon_energy_get(hwmon, channel, &energy);
/* Initialize 'struct xe_hwmon_fan_info' with initial fan register reading. */
for (channel = 0; channel < FAN_MAX; channel++)
if (xe_hwmon_is_visible(hwmon, hwmon_fan, hwmon_fan_input, channel))
xe_hwmon_fan_input_read(hwmon, channel, &fan_speed);
}
static void xe_hwmon_mutex_destroy(void *arg)

View File

@ -71,7 +71,7 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
lmtt->ops->lmtt_pte_num(level)),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_PINNED);
XE_BO_FLAG_NEEDS_64K);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_free_pt;

View File

@ -37,6 +37,7 @@
#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
#define LRC_PPHWSP_SIZE SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
static struct xe_device *
@ -50,19 +51,22 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
struct xe_device *xe = gt_to_xe(gt);
size_t size;
/* Per-process HW status page (PPHWSP) */
size = LRC_PPHWSP_SIZE;
/* Engine context image */
switch (class) {
case XE_ENGINE_CLASS_RENDER:
if (GRAPHICS_VER(xe) >= 20)
size = 4 * SZ_4K;
size += 3 * SZ_4K;
else
size = 14 * SZ_4K;
size += 13 * SZ_4K;
break;
case XE_ENGINE_CLASS_COMPUTE:
/* 14 pages since graphics_ver == 11 */
if (GRAPHICS_VER(xe) >= 20)
size = 3 * SZ_4K;
size += 2 * SZ_4K;
else
size = 14 * SZ_4K;
size += 13 * SZ_4K;
break;
default:
WARN(1, "Unknown engine class: %d", class);
@ -71,7 +75,7 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
case XE_ENGINE_CLASS_VIDEO_DECODE:
case XE_ENGINE_CLASS_VIDEO_ENHANCE:
case XE_ENGINE_CLASS_OTHER:
size = 2 * SZ_4K;
size += 1 * SZ_4K;
}
/* Add indirect ring state page */
@ -650,7 +654,6 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_PARALLEL_PPHWSP_OFFSET 2048
#define LRC_PPHWSP_SIZE SZ_4K
u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
{
@ -893,6 +896,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
void *init_data = NULL;
u32 arb_enable;
u32 lrc_size;
u32 bo_flags;
int err;
kref_init(&lrc->refcount);
@ -901,15 +905,18 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
if (xe_gt_has_indirect_ring_state(gt))
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE;
if (vm && vm->xef) /* userspace */
bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
/*
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE);
bo_flags);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
@ -1445,6 +1452,7 @@ static int dump_gfxpipe_command(struct drm_printer *p,
MATCH3D(3DSTATE_CLIP_MESH);
MATCH3D(3DSTATE_SBE_MESH);
MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
MATCH3D(3DSTATE_COARSE_PIXEL);
MATCH3D(3DSTATE_DRAWING_RECTANGLE);
MATCH3D(3DSTATE_CHROMA_KEY);

View File

@ -86,7 +86,7 @@ static const char *guc_name(struct xe_guc *guc)
* This object needs to be 4KiB aligned.
*
* - _`Interrupt Source Report Page`: this is the equivalent of the
* GEN11_GT_INTR_DWx registers, with each bit in those registers being
* GT_INTR_DWx registers, with each bit in those registers being
* mapped to a byte here. The offsets are the same, just bytes instead
* of bits. This object needs to be cacheline aligned.
*

View File

@ -97,7 +97,7 @@ struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile)
return tile->migrate->q;
}
static void xe_migrate_fini(struct drm_device *dev, void *arg)
static void xe_migrate_fini(void *arg)
{
struct xe_migrate *m = arg;
@ -209,7 +209,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
num_entries * XE_PAGE_SIZE,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_PINNED |
XE_BO_FLAG_PAGETABLE);
if (IS_ERR(bo))
return PTR_ERR(bo);
@ -401,7 +400,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
struct xe_vm *vm;
int err;
m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL);
m = devm_kzalloc(xe->drm.dev, sizeof(*m), GFP_KERNEL);
if (!m)
return ERR_PTR(-ENOMEM);
@ -455,7 +454,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
might_lock(&m->job_mutex);
fs_reclaim_release(GFP_KERNEL);
err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m);
err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m);
if (err)
return ERR_PTR(err);
@ -779,10 +778,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
bool dst_is_pltt = dst->mem_type == XE_PL_TT;
bool src_is_vram = mem_type_is_vram(src->mem_type);
bool dst_is_vram = mem_type_is_vram(dst->mem_type);
bool type_device = src_bo->ttm.type == ttm_bo_type_device;
bool needs_ccs_emit = type_device && xe_migrate_needs_ccs_emit(xe);
bool copy_ccs = xe_device_has_flat_ccs(xe) &&
xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
bool use_comp_pat = xe_device_has_flat_ccs(xe) &&
bool use_comp_pat = type_device && xe_device_has_flat_ccs(xe) &&
GRAPHICS_VER(xe) >= 20 && src_is_vram && !dst_is_vram;
/* Copying CCS between two different BOs is not supported yet. */
@ -839,6 +840,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
avail_pts, avail_pts);
if (copy_system_ccs) {
xe_assert(xe, type_device);
ccs_size = xe_device_ccs_bytes(xe, src_L0);
batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size,
&ccs_ofs, &ccs_pt, 0,
@ -849,7 +851,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
/* Add copy commands size here */
batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
((xe_migrate_needs_ccs_emit(xe) ? EMIT_COPY_CCS_DW : 0));
((needs_ccs_emit ? EMIT_COPY_CCS_DW : 0));
bb = xe_bb_new(gt, batch_size, usm);
if (IS_ERR(bb)) {
@ -878,7 +880,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (!copy_only_ccs)
emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
if (xe_migrate_needs_ccs_emit(xe))
if (needs_ccs_emit)
flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
IS_DGFX(xe) ? src_is_vram : src_is_pltt,
dst_L0_ofs,
@ -1544,6 +1546,7 @@ void xe_migrate_wait(struct xe_migrate *m)
dma_fence_wait(m->fence, false);
}
#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
static u32 pte_update_cmd_size(u64 size)
{
u32 num_dword;
@ -1608,6 +1611,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
{
struct xe_gt *gt = m->tile->primary_gt;
struct xe_device *xe = gt_to_xe(gt);
bool use_usm_batch = xe->info.has_usm;
struct dma_fence *fence = NULL;
u32 batch_size = 2;
u64 src_L0_ofs, dst_L0_ofs;
@ -1624,7 +1628,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
batch_size += pte_update_cmd_size(round_update_size);
batch_size += EMIT_COPY_DW;
bb = xe_bb_new(gt, batch_size, true);
bb = xe_bb_new(gt, batch_size, use_usm_batch);
if (IS_ERR(bb)) {
err = PTR_ERR(bb);
return ERR_PTR(err);
@ -1649,7 +1653,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
XE_PAGE_SIZE);
job = xe_bb_create_migration_job(m->q, bb,
xe_migrate_batch_base(m, true),
xe_migrate_batch_base(m, use_usm_batch),
update_idx);
if (IS_ERR(job)) {
err = PTR_ERR(job);
@ -1719,6 +1723,8 @@ struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
XE_MIGRATE_COPY_TO_SRAM);
}
#endif
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_migrate.c"
#endif

View File

@ -138,6 +138,7 @@ int xe_mmio_probe_early(struct xe_device *xe)
return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
}
ALLOW_ERROR_INJECTION(xe_mmio_probe_early, ERRNO); /* See xe_pci_probe() */
/**
* xe_mmio_init() - Initialize an MMIO instance
@ -204,8 +205,9 @@ void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val)
trace_xe_reg_rw(mmio, true, addr, val, sizeof(val));
if (!reg.vf && mmio->sriov_vf_gt)
xe_gt_sriov_vf_write32(mmio->sriov_vf_gt, reg, val);
if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe))
xe_gt_sriov_vf_write32(mmio->sriov_vf_gt ?:
mmio->tile->primary_gt, reg, val);
else
writel(val, mmio->regs + addr);
}
@ -218,8 +220,9 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg)
/* Wa_15015404425 */
mmio_flush_pending_writes(mmio);
if (!reg.vf && mmio->sriov_vf_gt)
val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt, reg);
if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe))
val = xe_gt_sriov_vf_read32(mmio->sriov_vf_gt ?:
mmio->tile->primary_gt, reg);
else
val = readl(mmio->regs + addr);

View File

@ -11,6 +11,7 @@
#include <drm/drm_module.h>
#include "xe_drv.h"
#include "xe_configfs.h"
#include "xe_hw_fence.h"
#include "xe_pci.h"
#include "xe_pm.h"
@ -38,8 +39,8 @@ MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
module_param_named(probe_display, xe_modparam.probe_display, bool, 0444);
MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)");
module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600);
MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)");
module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600);
MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size");
module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600);
MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)");
@ -88,6 +89,10 @@ static const struct init_funcs init_funcs[] = {
{
.init = xe_check_nomodeset,
},
{
.init = xe_configfs_init,
.exit = xe_configfs_exit,
},
{
.init = xe_hw_fence_module_init,
.exit = xe_hw_fence_module_exit,

View File

@ -2221,6 +2221,7 @@ addr_err:
kfree(oa_regs);
return ERR_PTR(err);
}
ALLOW_ERROR_INJECTION(xe_oa_alloc_regs, ERRNO);
static ssize_t show_dynamic_id(struct kobject *kobj,
struct kobj_attribute *attr,

View File

@ -62,11 +62,13 @@ struct xe_device_desc {
u8 is_dgfx:1;
u8 has_display:1;
u8 has_fan_control:1;
u8 has_heci_gscfi:1;
u8 has_heci_cscfi:1;
u8 has_llc:1;
u8 has_pxp:1;
u8 has_sriov:1;
u8 needs_scratch:1;
u8 skip_guc_pc:1;
u8 skip_mtcfg:1;
u8 skip_pcode:1;
@ -302,6 +304,7 @@ static const struct xe_device_desc dg2_desc = {
DG2_FEATURES,
.has_display = true,
.has_fan_control = true,
};
static const __maybe_unused struct xe_device_desc pvc_desc = {
@ -329,6 +332,7 @@ static const struct xe_device_desc lnl_desc = {
.dma_mask_size = 46,
.has_display = true,
.has_pxp = true,
.needs_scratch = true,
};
static const struct xe_device_desc bmg_desc = {
@ -336,7 +340,9 @@ static const struct xe_device_desc bmg_desc = {
PLATFORM(BATTLEMAGE),
.dma_mask_size = 46,
.has_display = true,
.has_fan_control = true,
.has_heci_cscfi = 1,
.needs_scratch = true,
};
static const struct xe_device_desc ptl_desc = {
@ -345,6 +351,7 @@ static const struct xe_device_desc ptl_desc = {
.has_display = true,
.has_sriov = true,
.require_force_probe = true,
.needs_scratch = true,
};
#undef PLATFORM
@ -575,6 +582,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.dma_mask_size = desc->dma_mask_size;
xe->info.is_dgfx = desc->is_dgfx;
xe->info.has_fan_control = desc->has_fan_control;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
xe->info.has_heci_cscfi = desc->has_heci_cscfi;
xe->info.has_llc = desc->has_llc;
@ -583,6 +591,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.skip_guc_pc = desc->skip_guc_pc;
xe->info.skip_mtcfg = desc->skip_mtcfg;
xe->info.skip_pcode = desc->skip_pcode;
xe->info.needs_scratch = desc->needs_scratch;
xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
xe_modparam.probe_display &&
@ -803,18 +812,17 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return err;
err = xe_device_probe_early(xe);
if (err) {
/*
* In Boot Survivability mode, no drm card is exposed and driver
* is loaded with bare minimum to allow for firmware to be
* flashed through mei. If early probe failed, but it managed to
* enable survivability mode, return success.
*/
if (xe_survivability_mode_is_enabled(xe))
return 0;
/*
* In Boot Survivability mode, no drm card is exposed and driver
* is loaded with bare minimum to allow for firmware to be
* flashed through mei. Return success, if survivability mode
* is enabled due to pcode failure or configfs being set
*/
if (xe_survivability_mode_is_enabled(xe))
return 0;
if (err)
return err;
}
err = xe_info_init(xe, desc);
if (err)
@ -920,6 +928,7 @@ static int xe_pci_suspend(struct device *dev)
pci_save_state(pdev);
pci_disable_device(pdev);
pci_set_power_state(pdev, PCI_D3cold);
return 0;
}

View File

@ -7,6 +7,7 @@
#include "xe_device.h"
#include "xe_gt_sriov_pf_config.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_guc_engine_activity.h"
#include "xe_pci_sriov.h"
#include "xe_pm.h"
#include "xe_sriov.h"
@ -111,6 +112,20 @@ static void pf_link_vfs(struct xe_device *xe, int num_vfs)
}
}
static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, bool enable)
{
struct xe_gt *gt;
unsigned int id;
int ret = 0;
for_each_gt(gt, xe, id) {
ret = xe_guc_engine_activity_function_stats(&gt->uc.guc, num_vfs, enable);
if (ret)
xe_sriov_info(xe, "Failed to %s engine activity function stats (%pe)\n",
str_enable_disable(enable), ERR_PTR(ret));
}
}
static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@ -145,6 +160,9 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
xe_sriov_info(xe, "Enabled %u of %u VF%s\n",
num_vfs, total_vfs, str_plural(total_vfs));
pf_engine_activity_stats(xe, num_vfs, true);
return num_vfs;
failed:
@ -168,6 +186,8 @@ static int pf_disable_vfs(struct xe_device *xe)
if (!num_vfs)
return 0;
pf_engine_activity_stats(xe, num_vfs, false);
pci_disable_sriov(pdev);
pf_reset_vfs(xe, num_vfs);

View File

@ -7,6 +7,7 @@
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/error-injection.h>
#include <drm/drm_managed.h>
@ -323,3 +324,4 @@ int xe_pcode_probe_early(struct xe_device *xe)
{
return xe_pcode_ready(xe, false);
}
ALLOW_ERROR_INJECTION(xe_pcode_probe_early, ERRNO); /* See xe_pci_probe */

View File

@ -49,6 +49,9 @@
/* Domain IDs (param2) */
#define PCODE_MBOX_DOMAIN_HBM 0x2
#define FAN_SPEED_CONTROL 0x7D
#define FSC_READ_NUM_FANS 0x4
#define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4))
/* PCODE_SCRATCH0 */
#define AUXINFO_REG_OFFSET REG_GENMASK(17, 15)

View File

@ -188,7 +188,7 @@ int xe_pm_resume(struct xe_device *xe)
* This only restores pinned memory which is the memory required for the
* GT(s) to resume.
*/
err = xe_bo_restore_kernel(xe);
err = xe_bo_restore_early(xe);
if (err)
goto err;
@ -199,7 +199,7 @@ int xe_pm_resume(struct xe_device *xe)
xe_display_pm_resume(xe);
err = xe_bo_restore_user(xe);
err = xe_bo_restore_late(xe);
if (err)
goto err;
@ -484,7 +484,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
* This only restores pinned memory which is the memory
* required for the GT(s) to resume.
*/
err = xe_bo_restore_kernel(xe);
err = xe_bo_restore_early(xe);
if (err)
goto out;
}
@ -497,7 +497,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
xe_display_pm_runtime_resume(xe);
if (xe->d3cold.allowed) {
err = xe_bo_restore_user(xe);
err = xe_bo_restore_late(xe);
if (err)
goto out;
}

View File

@ -10,9 +10,11 @@
#include "xe_force_wake.h"
#include "xe_gt_idle.h"
#include "xe_guc_engine_activity.h"
#include "xe_guc_pc.h"
#include "xe_hw_engine.h"
#include "xe_pm.h"
#include "xe_pmu.h"
#include "xe_sriov_pf_helpers.h"
/**
* DOC: Xe PMU (Performance Monitoring Unit)
@ -32,9 +34,10 @@
* gt[60:63] Selects gt for the event
* engine_class[20:27] Selects engine-class for event
* engine_instance[12:19] Selects the engine-instance for the event
* function[44:59] Selects the function of the event (SRIOV enabled)
*
* For engine specific events (engine-*), gt, engine_class and engine_instance parameters must be
* set as populated by DRM_XE_DEVICE_QUERY_ENGINES.
* set as populated by DRM_XE_DEVICE_QUERY_ENGINES and function if SRIOV is enabled.
*
* For gt specific events (gt-*) gt parameter must be passed. All other parameters will be 0.
*
@ -49,6 +52,7 @@
*/
#define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60)
#define XE_PMU_EVENT_FUNCTION_MASK GENMASK_ULL(59, 44)
#define XE_PMU_EVENT_ENGINE_CLASS_MASK GENMASK_ULL(27, 20)
#define XE_PMU_EVENT_ENGINE_INSTANCE_MASK GENMASK_ULL(19, 12)
#define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0)
@ -58,6 +62,11 @@ static unsigned int config_to_event_id(u64 config)
return FIELD_GET(XE_PMU_EVENT_ID_MASK, config);
}
static unsigned int config_to_function_id(u64 config)
{
return FIELD_GET(XE_PMU_EVENT_FUNCTION_MASK, config);
}
static unsigned int config_to_engine_class(u64 config)
{
return FIELD_GET(XE_PMU_EVENT_ENGINE_CLASS_MASK, config);
@ -76,6 +85,8 @@ static unsigned int config_to_gt_id(u64 config)
#define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01
#define XE_PMU_EVENT_ENGINE_ACTIVE_TICKS 0x02
#define XE_PMU_EVENT_ENGINE_TOTAL_TICKS 0x03
#define XE_PMU_EVENT_GT_ACTUAL_FREQUENCY 0x04
#define XE_PMU_EVENT_GT_REQUESTED_FREQUENCY 0x05
static struct xe_gt *event_to_gt(struct perf_event *event)
{
@ -111,6 +122,14 @@ static bool is_engine_event(u64 config)
event_id == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
}
static bool is_gt_frequency_event(struct perf_event *event)
{
u32 id = config_to_event_id(event->attr.config);
return id == XE_PMU_EVENT_GT_ACTUAL_FREQUENCY ||
id == XE_PMU_EVENT_GT_REQUESTED_FREQUENCY;
}
static bool event_gt_forcewake(struct perf_event *event)
{
struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
@ -118,7 +137,7 @@ static bool event_gt_forcewake(struct perf_event *event)
struct xe_gt *gt;
unsigned int *fw_ref;
if (!is_engine_event(config))
if (!is_engine_event(config) && !is_gt_frequency_event(event))
return true;
gt = xe_device_get_gt(xe, config_to_gt_id(config));
@ -151,7 +170,7 @@ static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
static bool event_param_valid(struct perf_event *event)
{
struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
unsigned int engine_class, engine_instance;
unsigned int engine_class, engine_instance, function_id;
u64 config = event->attr.config;
struct xe_gt *gt;
@ -161,16 +180,28 @@ static bool event_param_valid(struct perf_event *event)
engine_class = config_to_engine_class(config);
engine_instance = config_to_engine_instance(config);
function_id = config_to_function_id(config);
switch (config_to_event_id(config)) {
case XE_PMU_EVENT_GT_C6_RESIDENCY:
if (engine_class || engine_instance)
case XE_PMU_EVENT_GT_ACTUAL_FREQUENCY:
case XE_PMU_EVENT_GT_REQUESTED_FREQUENCY:
if (engine_class || engine_instance || function_id)
return false;
break;
case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
if (!event_to_hwe(event))
return false;
/* PF(0) and total vfs when SRIOV is enabled */
if (IS_SRIOV_PF(xe)) {
if (function_id > xe_sriov_pf_get_totalvfs(xe))
return false;
} else if (function_id) {
return false;
}
break;
}
@ -242,13 +273,17 @@ static int xe_pmu_event_init(struct perf_event *event)
static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event)
{
struct xe_hw_engine *hwe;
u64 val = 0;
unsigned int function_id;
u64 config, val = 0;
config = event->attr.config;
function_id = config_to_function_id(config);
hwe = event_to_hwe(event);
if (config_to_event_id(event->attr.config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
val = xe_guc_engine_activity_active_ticks(&gt->uc.guc, hwe);
if (config_to_event_id(config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
val = xe_guc_engine_activity_active_ticks(&gt->uc.guc, hwe, function_id);
else
val = xe_guc_engine_activity_total_ticks(&gt->uc.guc, hwe);
val = xe_guc_engine_activity_total_ticks(&gt->uc.guc, hwe, function_id);
return val;
}
@ -266,6 +301,10 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
return read_engine_events(gt, event);
case XE_PMU_EVENT_GT_ACTUAL_FREQUENCY:
return xe_guc_pc_get_act_freq(&gt->uc.guc.pc);
case XE_PMU_EVENT_GT_REQUESTED_FREQUENCY:
return xe_guc_pc_get_cur_freq_fw(&gt->uc.guc.pc);
}
return 0;
@ -281,7 +320,14 @@ static void xe_pmu_event_update(struct perf_event *event)
new = __xe_pmu_event_read(event);
} while (!local64_try_cmpxchg(&hwc->prev_count, &prev, new));
local64_add(new - prev, &event->count);
/*
* GT frequency is not a monotonically increasing counter, so add the
* instantaneous value instead.
*/
if (is_gt_frequency_event(event))
local64_add(new, &event->count);
else
local64_add(new - prev, &event->count);
}
static void xe_pmu_event_read(struct perf_event *event)
@ -351,6 +397,7 @@ static void xe_pmu_event_del(struct perf_event *event, int flags)
}
PMU_FORMAT_ATTR(gt, "config:60-63");
PMU_FORMAT_ATTR(function, "config:44-59");
PMU_FORMAT_ATTR(engine_class, "config:20-27");
PMU_FORMAT_ATTR(engine_instance, "config:12-19");
PMU_FORMAT_ATTR(event, "config:0-11");
@ -359,6 +406,7 @@ static struct attribute *pmu_format_attrs[] = {
&format_attr_event.attr,
&format_attr_engine_class.attr,
&format_attr_engine_instance.attr,
&format_attr_function.attr,
&format_attr_gt.attr,
NULL,
};
@ -419,6 +467,10 @@ static ssize_t event_attr_show(struct device *dev,
XE_EVENT_ATTR_SIMPLE(gt-c6-residency, gt_c6_residency, XE_PMU_EVENT_GT_C6_RESIDENCY, "ms");
XE_EVENT_ATTR_NOUNIT(engine-active-ticks, engine_active_ticks, XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
XE_EVENT_ATTR_NOUNIT(engine-total-ticks, engine_total_ticks, XE_PMU_EVENT_ENGINE_TOTAL_TICKS);
XE_EVENT_ATTR_SIMPLE(gt-actual-frequency, gt_actual_frequency,
XE_PMU_EVENT_GT_ACTUAL_FREQUENCY, "MHz");
XE_EVENT_ATTR_SIMPLE(gt-requested-frequency, gt_requested_frequency,
XE_PMU_EVENT_GT_REQUESTED_FREQUENCY, "MHz");
static struct attribute *pmu_empty_event_attrs[] = {
/* Empty - all events are added as groups with .attr_update() */
@ -434,6 +486,8 @@ static const struct attribute_group *pmu_events_attr_update[] = {
&pmu_group_gt_c6_residency,
&pmu_group_engine_active_ticks,
&pmu_group_engine_total_ticks,
&pmu_group_gt_actual_frequency,
&pmu_group_gt_requested_frequency,
NULL,
};
@ -442,8 +496,11 @@ static void set_supported_events(struct xe_pmu *pmu)
struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
struct xe_gt *gt = xe_device_get_gt(xe, 0);
if (!xe->info.skip_guc_pc)
if (!xe->info.skip_guc_pc) {
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY);
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_ACTUAL_FREQUENCY);
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_REQUESTED_FREQUENCY);
}
if (xe_guc_engine_activity_supported(&gt->uc.guc)) {
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);

View File

@ -103,6 +103,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
{
struct xe_pt *pt;
struct xe_bo *bo;
u32 bo_flags;
int err;
if (level) {
@ -115,14 +116,16 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
if (!pt)
return ERR_PTR(-ENOMEM);
bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
XE_BO_FLAG_NO_RESV_EVICT | XE_BO_FLAG_PAGETABLE;
if (vm->xef) /* userspace */
bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
pt->level = level;
bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
XE_BO_FLAG_PINNED |
XE_BO_FLAG_NO_RESV_EVICT |
XE_BO_FLAG_PAGETABLE);
bo_flags);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto err_kfree;
@ -269,8 +272,11 @@ struct xe_pt_update {
bool preexisting;
};
/**
* struct xe_pt_stage_bind_walk - Walk state for the stage_bind walk.
*/
struct xe_pt_stage_bind_walk {
/** base: The base class. */
/** @base: The base class. */
struct xe_pt_walk base;
/* Input parameters for the walk */
@ -278,15 +284,19 @@ struct xe_pt_stage_bind_walk {
struct xe_vm *vm;
/** @tile: The tile we're building for. */
struct xe_tile *tile;
/** @default_pte: PTE flag only template. No address is associated */
u64 default_pte;
/** @default_vram_pte: PTE flag only template for VRAM. No address is associated */
u64 default_vram_pte;
/** @default_system_pte: PTE flag only template for System. No address is associated */
u64 default_system_pte;
/** @dma_offset: DMA offset to add to the PTE. */
u64 dma_offset;
/**
* @needs_64k: This address range enforces 64K alignment and
* granularity.
* @needs_64K: This address range enforces 64K alignment and
* granularity on VRAM.
*/
bool needs_64K;
/** @clear_pt: clear page table entries during the bind walk */
bool clear_pt;
/**
* @vma: VMA being mapped
*/
@ -299,6 +309,7 @@ struct xe_pt_stage_bind_walk {
u64 va_curs_start;
/* Output */
/** @wupd: Walk output data for page-table updates. */
struct xe_walk_update {
/** @wupd.entries: Caller provided storage. */
struct xe_vm_pgtable_update *entries;
@ -316,7 +327,7 @@ struct xe_pt_stage_bind_walk {
u64 l0_end_addr;
/** @addr_64K: The start address of the current 64K chunk. */
u64 addr_64K;
/** @found_64: Whether @add_64K actually points to a 64K chunk. */
/** @found_64K: Whether @add_64K actually points to a 64K chunk. */
bool found_64K;
};
@ -436,6 +447,10 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
if (xe_vma_is_null(xe_walk->vma))
return true;
/* if we are clearing page table, no dma addresses*/
if (xe_walk->clear_pt)
return true;
/* Is the DMA address huge PTE size aligned? */
size = next - addr;
dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
@ -515,24 +530,35 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
struct xe_res_cursor *curs = xe_walk->curs;
bool is_null = xe_vma_is_null(xe_walk->vma);
bool is_vram = is_null ? false : xe_res_is_vram(curs);
XE_WARN_ON(xe_walk->va_curs_start != addr);
pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
xe_res_dma(curs) + xe_walk->dma_offset,
xe_walk->vma, pat_index, level);
pte |= xe_walk->default_pte;
if (xe_walk->clear_pt) {
pte = 0;
} else {
pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
xe_res_dma(curs) +
xe_walk->dma_offset,
xe_walk->vma,
pat_index, level);
if (!is_null)
pte |= is_vram ? xe_walk->default_vram_pte :
xe_walk->default_system_pte;
/*
* Set the XE_PTE_PS64 hint if possible, otherwise if
* this device *requires* 64K PTE size for VRAM, fail.
*/
if (level == 0 && !xe_parent->is_compact) {
if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
pte |= XE_PTE_PS64;
} else if (XE_WARN_ON(xe_walk->needs_64K)) {
return -EINVAL;
/*
* Set the XE_PTE_PS64 hint if possible, otherwise if
* this device *requires* 64K PTE size for VRAM, fail.
*/
if (level == 0 && !xe_parent->is_compact) {
if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
xe_walk->vma->gpuva.flags |=
XE_VMA_PTE_64K;
pte |= XE_PTE_PS64;
} else if (XE_WARN_ON(xe_walk->needs_64K &&
is_vram)) {
return -EINVAL;
}
}
}
@ -540,7 +566,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
if (unlikely(ret))
return ret;
if (!is_null)
if (!is_null && !xe_walk->clear_pt)
xe_res_next(curs, next - addr);
xe_walk->va_curs_start = next;
xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
@ -603,6 +629,44 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
.pt_entry = xe_pt_stage_bind_entry,
};
/*
* Default atomic expectations for different allocation scenarios are as follows:
*
* 1. Traditional API: When the VM is not in LR mode:
* - Device atomics are expected to function with all allocations.
*
* 2. Compute/SVM API: When the VM is in LR mode:
* - Device atomics are the default behavior when the bo is placed in a single region.
* - In all other cases device atomics will be disabled with AE=0 until an application
* request differently using a ioctl like madvise.
*/
static bool xe_atomic_for_vram(struct xe_vm *vm)
{
return true;
}
static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo)
{
struct xe_device *xe = vm->xe;
if (!xe->info.has_device_atomics_on_smem)
return false;
/*
* If a SMEM+LMEM allocation is backed by SMEM, a device
* atomics will cause a gpu page fault and which then
* gets migrated to LMEM, bind such allocations with
* device atomics enabled.
*
* TODO: Revisit this. Perhaps add something like a
* fault_on_atomics_in_system UAPI flag.
* Note that this also prohibits GPU atomics in LR mode for
* userptr and system memory on DGFX.
*/
return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) ||
(bo && xe_bo_has_single_placement(bo))));
}
/**
* xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
* range.
@ -612,6 +676,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
* @entries: Storage for the update entries used for connecting the tree to
* the main tree at commit time.
* @num_entries: On output contains the number of @entries used.
* @clear_pt: Clear the page table entries.
*
* This function builds a disconnected page-table tree for a given address
* range. The tree is connected to the main vm tree for the gpu using
@ -625,13 +690,13 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
static int
xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
struct xe_svm_range *range,
struct xe_vm_pgtable_update *entries, u32 *num_entries)
struct xe_vm_pgtable_update *entries,
u32 *num_entries, bool clear_pt)
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_bo *bo = xe_vma_bo(vma);
bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
(xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
struct xe_res_cursor curs;
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_pt_stage_bind_walk xe_walk = {
.base = {
.ops = &xe_pt_stage_bind_ops,
@ -639,34 +704,31 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
.max_level = XE_PT_HIGHEST_LEVEL,
.staging = true,
},
.vm = xe_vma_vm(vma),
.vm = vm,
.tile = tile,
.curs = &curs,
.va_curs_start = range ? range->base.itree.start :
xe_vma_start(vma),
.vma = vma,
.wupd.entries = entries,
.clear_pt = clear_pt,
};
struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
struct xe_pt *pt = vm->pt_root[tile->id];
int ret;
if (range) {
/* Move this entire thing to xe_svm.c? */
xe_svm_notifier_lock(xe_vma_vm(vma));
xe_svm_notifier_lock(vm);
if (!xe_svm_range_pages_valid(range)) {
xe_svm_range_debug(range, "BIND PREPARE - RETRY");
xe_svm_notifier_unlock(xe_vma_vm(vma));
xe_svm_notifier_unlock(vm);
return -EAGAIN;
}
if (xe_svm_range_has_dma_mapping(range)) {
xe_res_first_dma(range->base.dma_addr, 0,
range->base.itree.last + 1 - range->base.itree.start,
&curs);
is_devmem = xe_res_is_vram(&curs);
if (is_devmem)
xe_svm_range_debug(range, "BIND PREPARE - DMA VRAM");
else
xe_svm_range_debug(range, "BIND PREPARE - DMA");
xe_svm_range_debug(range, "BIND PREPARE - MIXED");
} else {
xe_assert(xe, false);
}
@ -674,54 +736,21 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
* Note, when unlocking the resource cursor dma addresses may become
* stale, but the bind will be aborted anyway at commit time.
*/
xe_svm_notifier_unlock(xe_vma_vm(vma));
xe_svm_notifier_unlock(vm);
}
xe_walk.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem;
xe_walk.needs_64K = (vm->flags & XE_VM_FLAG_64K);
if (clear_pt)
goto walk_pt;
/**
* Default atomic expectations for different allocation scenarios are as follows:
*
* 1. Traditional API: When the VM is not in LR mode:
* - Device atomics are expected to function with all allocations.
*
* 2. Compute/SVM API: When the VM is in LR mode:
* - Device atomics are the default behavior when the bo is placed in a single region.
* - In all other cases device atomics will be disabled with AE=0 until an application
* request differently using a ioctl like madvise.
*/
if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
if (bo && xe_bo_has_single_placement(bo))
xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
/**
* If a SMEM+LMEM allocation is backed by SMEM, a device
* atomics will cause a gpu page fault and which then
* gets migrated to LMEM, bind such allocations with
* device atomics enabled.
*/
else if (is_devmem)
xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
} else {
xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
}
/**
* Unset AE if the platform(PVC) doesn't support it on an
* allocation
*/
if (!xe->info.has_device_atomics_on_smem && !is_devmem)
xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0;
xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ?
XE_USM_PPGTT_PTE_AE : 0;
}
if (is_devmem) {
xe_walk.default_pte |= XE_PPGTT_PTE_DM;
xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
}
if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM;
xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
if (!range)
xe_bo_assert_held(bo);
@ -739,6 +768,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
curs.size = xe_vma_size(vma);
}
walk_pt:
ret = xe_pt_walk_range(&pt->base, pt->level,
range ? range->base.itree.start : xe_vma_start(vma),
range ? range->base.itree.last + 1 : xe_vma_end(vma),
@ -1103,12 +1133,14 @@ static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries,
static int
xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
struct xe_svm_range *range,
struct xe_vm_pgtable_update *entries, u32 *num_entries)
struct xe_vm_pgtable_update *entries,
u32 *num_entries, bool invalidate_on_bind)
{
int err;
*num_entries = 0;
err = xe_pt_stage_bind(tile, vma, range, entries, num_entries);
err = xe_pt_stage_bind(tile, vma, range, entries, num_entries,
invalidate_on_bind);
if (!err)
xe_tile_assert(tile, *num_entries);
@ -1420,6 +1452,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
return err;
}
#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
{
struct xe_vm *vm = pt_update->vops->vm;
@ -1453,6 +1486,7 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
return 0;
}
#endif
struct invalidation_fence {
struct xe_gt_tlb_invalidation_fence base;
@ -1791,7 +1825,7 @@ static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
struct xe_vma *vma)
struct xe_vma *vma, bool invalidate_on_bind)
{
u32 current_op = pt_update_ops->current_op;
struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
@ -1813,7 +1847,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
return err;
err = xe_pt_prepare_bind(tile, vma, NULL, pt_op->entries,
&pt_op->num_entries);
&pt_op->num_entries, invalidate_on_bind);
if (!err) {
xe_tile_assert(tile, pt_op->num_entries <=
ARRAY_SIZE(pt_op->entries));
@ -1835,11 +1869,11 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
* If !rebind, and scratch enabled VMs, there is a chance the scratch
* PTE is already cached in the TLB so it needs to be invalidated.
* On !LR VMs this is done in the ring ops preceding a batch, but on
* non-faulting LR, in particular on user-space batch buffer chaining,
* it needs to be done here.
* LR, in particular on user-space batch buffer chaining, it needs to
* be done here.
*/
if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
xe_vm_in_preempt_fence_mode(vm)))
xe_vm_in_lr_mode(vm)))
pt_update_ops->needs_invalidation = true;
else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
/* We bump also if batch_invalidate_tlb is true */
@ -1875,7 +1909,7 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile,
pt_op->rebind = BIT(tile->id) & range->tile_present;
err = xe_pt_prepare_bind(tile, vma, range, pt_op->entries,
&pt_op->num_entries);
&pt_op->num_entries, false);
if (!err) {
xe_tile_assert(tile, pt_op->num_entries <=
ARRAY_SIZE(pt_op->entries));
@ -1987,11 +2021,13 @@ static int op_prepare(struct xe_vm *vm,
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) ||
if ((!op->map.immediate && xe_vm_in_fault_mode(vm) &&
!op->map.invalidate_on_bind) ||
op->map.is_cpu_addr_mirror)
break;
err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma);
err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma,
op->map.invalidate_on_bind);
pt_update_ops->wait_vm_kernel = true;
break;
case DRM_GPUVA_OP_REMAP:
@ -2005,12 +2041,12 @@ static int op_prepare(struct xe_vm *vm,
if (!err && op->remap.prev) {
err = bind_op_prepare(vm, tile, pt_update_ops,
op->remap.prev);
op->remap.prev, false);
pt_update_ops->wait_vm_bookkeep = true;
}
if (!err && op->remap.next) {
err = bind_op_prepare(vm, tile, pt_update_ops,
op->remap.next);
op->remap.next, false);
pt_update_ops->wait_vm_bookkeep = true;
}
break;
@ -2032,7 +2068,7 @@ static int op_prepare(struct xe_vm *vm,
if (xe_vma_is_cpu_addr_mirror(vma))
break;
err = bind_op_prepare(vm, tile, pt_update_ops, vma);
err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
pt_update_ops->wait_vm_kernel = true;
break;
}
@ -2115,7 +2151,7 @@ ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO);
static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
struct xe_vma *vma, struct dma_fence *fence,
struct dma_fence *fence2)
struct dma_fence *fence2, bool invalidate_on_bind)
{
xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
@ -2132,6 +2168,8 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
}
vma->tile_present |= BIT(tile->id);
vma->tile_staged &= ~BIT(tile->id);
if (invalidate_on_bind)
vma->tile_invalidated |= BIT(tile->id);
if (xe_vma_is_userptr(vma)) {
lockdep_assert_held_read(&vm->userptr.notifier_lock);
to_userptr_vma(vma)->userptr.initial_bind = true;
@ -2193,7 +2231,7 @@ static void op_commit(struct xe_vm *vm,
break;
bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence,
fence2);
fence2, op->map.invalidate_on_bind);
break;
case DRM_GPUVA_OP_REMAP:
{
@ -2206,10 +2244,10 @@ static void op_commit(struct xe_vm *vm,
if (op->remap.prev)
bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
fence, fence2);
fence, fence2, false);
if (op->remap.next)
bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
fence, fence2);
fence, fence2, false);
break;
}
case DRM_GPUVA_OP_UNMAP:
@ -2227,7 +2265,7 @@ static void op_commit(struct xe_vm *vm,
if (!xe_vma_is_cpu_addr_mirror(vma))
bind_op_commit(vm, tile, pt_update_ops, vma, fence,
fence2);
fence2, false);
break;
}
case DRM_GPUVA_OP_DRIVER:
@ -2257,11 +2295,15 @@ static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
.pre_commit = xe_pt_userptr_pre_commit,
};
#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
static const struct xe_migrate_pt_update_ops svm_migrate_ops = {
.populate = xe_vm_populate_pgtable,
.clear = xe_migrate_clear_pgtable_callback,
.pre_commit = xe_pt_svm_pre_commit,
};
#else
static const struct xe_migrate_pt_update_ops svm_migrate_ops;
#endif
/**
* xe_pt_update_ops_run() - Run PT update operations

View File

@ -340,7 +340,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM;
if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_GPUSVM))
if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM))
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR;
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=

View File

@ -173,6 +173,9 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
if (xa_empty(&sr->xa))
return;
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name);
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);

View File

@ -8,7 +8,7 @@
struct xe_sched_job;
#define MAX_JOB_SIZE_DW 48
#define MAX_JOB_SIZE_DW 58
#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4)
/**

View File

@ -258,9 +258,6 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
rtp_get_context(ctx, &hwe, &gt, &xe);
if (IS_SRIOV_VF(xe))
return;
xe_assert(xe, entries);
for (entry = entries; entry - entries < n_entries; entry++) {

View File

@ -60,7 +60,8 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
bo = xe_managed_bo_create_pin_map(xe, tile, size,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE);
XE_BO_FLAG_GGTT_INVALIDATE |
XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo)) {
drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n",
size / SZ_1K, bo);

View File

@ -10,6 +10,7 @@
#include <linux/pci.h>
#include <linux/sysfs.h>
#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_heci_gsc.h"
@ -28,20 +29,32 @@
* This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware
* to be flashed through mei and collect telemetry. The driver's probe flow is modified
* such that it enters survivability mode when pcode initialization is incomplete and boot status
* denotes a failure. The driver then populates the survivability_mode PCI sysfs indicating
* survivability mode and provides additional information required for debug
* denotes a failure.
*
* KMD exposes below admin-only readable sysfs in survivability mode
* Survivability mode can also be entered manually using the survivability mode attribute available
* through configfs which is beneficial in several usecases. It can be used to address scenarios
* where pcode does not detect failure or for validation purposes. It can also be used in
* In-Field-Repair (IFR) to repair a single card without impacting the other cards in a node.
*
* device/survivability_mode: The presence of this file indicates that the card is in survivability
* mode. Also, provides additional information on why the driver entered
* survivability mode.
* Use below command enable survivability mode manually::
*
* Capability Information - Provides boot status
* Postcode Information - Provides information about the failure
* Overflow Information - Provides history of previous failures
* Auxiliary Information - Certain failures may have information in
* addition to postcode information
* # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode
*
* Refer :ref:`xe_configfs` for more details on how to use configfs
*
* Survivability mode is indicated by the below admin-only readable sysfs which provides additional
* debug information::
*
* /sys/bus/pci/devices/<device>/surivability_mode
*
* Capability Information:
* Provides boot status
* Postcode Information:
* Provides information about the failure
* Overflow Information
* Provides history of previous failures
* Auxiliary Information
* Certain failures may have information in addition to postcode information
*/
static u32 aux_history_offset(u32 reg_value)
@ -133,6 +146,7 @@ static void xe_survivability_mode_fini(void *arg)
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct device *dev = &pdev->dev;
xe_configfs_clear_survivability_mode(pdev);
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
}
@ -186,24 +200,41 @@ bool xe_survivability_mode_is_enabled(struct xe_device *xe)
return xe->survivability.mode;
}
/*
* survivability_mode_requested - check if it's possible to enable
* survivability mode and that was requested by firmware
/**
* xe_survivability_mode_is_requested - check if it's possible to enable survivability
* mode that was requested by firmware or userspace
* @xe: xe device instance
*
* This function reads the boot status from Pcode.
* This function reads configfs and boot status from Pcode.
*
* Return: true if platform support is available and boot status indicates
* failure, false otherwise.
* failure or if survivability mode is requested, false otherwise.
*/
static bool survivability_mode_requested(struct xe_device *xe)
bool xe_survivability_mode_is_requested(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u32 data;
bool survivability_mode;
if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE || IS_SRIOV_VF(xe))
if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
return false;
survivability_mode = xe_configfs_get_survivability_mode(pdev);
if (xe->info.platform < XE_BATTLEMAGE) {
if (survivability_mode) {
dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n");
xe_configfs_clear_survivability_mode(pdev);
}
return false;
}
/* Enable survivability mode if set via configfs */
if (survivability_mode)
return true;
data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
@ -226,7 +257,7 @@ int xe_survivability_mode_enable(struct xe_device *xe)
struct xe_survivability_info *info;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
if (!survivability_mode_requested(xe))
if (!xe_survivability_mode_is_requested(xe))
return 0;
survivability->size = MAX_SCRATCH_MMIO;

View File

@ -12,5 +12,6 @@ struct xe_device;
int xe_survivability_mode_enable(struct xe_device *xe);
bool xe_survivability_mode_is_enabled(struct xe_device *xe);
bool xe_survivability_mode_is_requested(struct xe_device *xe);
#endif /* _XE_SURVIVABILITY_MODE_H_ */

View File

@ -4,6 +4,7 @@
*/
#include "xe_bo.h"
#include "xe_gt_stats.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
#include "xe_module.h"
@ -339,6 +340,8 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w)
up_write(&vm->lock);
}
#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
static struct xe_vram_region *page_to_vr(struct page *page)
{
return container_of(page_pgmap(page), struct xe_vram_region, pagemap);
@ -577,6 +580,8 @@ static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = {
.copy_to_ram = xe_svm_copy_to_ram,
};
#endif
static const struct drm_gpusvm_ops gpusvm_ops = {
.range_alloc = xe_svm_range_alloc,
.range_free = xe_svm_range_free,
@ -650,6 +655,7 @@ static bool xe_svm_range_is_valid(struct xe_svm_range *range,
return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id);
}
#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
{
return &tile->mem.vram;
@ -711,12 +717,21 @@ unlock:
return err;
}
#else
static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
struct xe_svm_range *range,
const struct drm_gpusvm_ctx *ctx)
{
return -EOPNOTSUPP;
}
#endif
/**
* xe_svm_handle_pagefault() - SVM handle page fault
* @vm: The VM.
* @vma: The CPU address mirror VMA.
* @tile: The tile upon the fault occurred.
* @gt: The gt upon the fault occurred.
* @fault_addr: The GPU fault address.
* @atomic: The fault atomic access bit.
*
@ -726,7 +741,7 @@ unlock:
* Return: 0 on success, negative error code on error.
*/
int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
struct xe_tile *tile, u64 fault_addr,
struct xe_gt *gt, u64 fault_addr,
bool atomic)
{
struct drm_gpusvm_ctx ctx = {
@ -740,12 +755,15 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
struct drm_gpusvm_range *r;
struct drm_exec exec;
struct dma_fence *fence;
struct xe_tile *tile = gt_to_tile(gt);
ktime_t end = 0;
int err;
lockdep_assert_held_write(&vm->lock);
xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1);
retry:
/* Always process UNMAPs first so view SVM ranges is current */
err = xe_svm_garbage_collector(vm);
@ -866,6 +884,7 @@ int xe_svm_bo_evict(struct xe_bo *bo)
}
#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
static struct drm_pagemap_device_addr
xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
struct device *dev,

View File

@ -6,16 +6,19 @@
#ifndef _XE_SVM_H_
#define _XE_SVM_H_
#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
#include <drm/drm_pagemap.h>
#include <drm/drm_gpusvm.h>
#define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
struct xe_bo;
struct xe_vram_region;
struct xe_gt;
struct xe_tile;
struct xe_vm;
struct xe_vma;
struct xe_vram_region;
/** struct xe_svm_range - SVM range */
struct xe_svm_range {
@ -43,7 +46,6 @@ struct xe_svm_range {
u8 skip_migrate :1;
};
#if IS_ENABLED(CONFIG_DRM_GPUSVM)
/**
* xe_svm_range_pages_valid() - SVM range pages valid
* @range: SVM range
@ -64,7 +66,7 @@ void xe_svm_fini(struct xe_vm *vm);
void xe_svm_close(struct xe_vm *vm);
int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
struct xe_tile *tile, u64 fault_addr,
struct xe_gt *gt, u64 fault_addr,
bool atomic);
bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end);
@ -72,7 +74,50 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end);
int xe_svm_bo_evict(struct xe_bo *bo);
void xe_svm_range_debug(struct xe_svm_range *range, const char *operation);
/**
* xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
* @range: SVM range
*
* Return: True if SVM range has a DMA mapping, False otherwise
*/
static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range)
{
lockdep_assert_held(&range->base.gpusvm->notifier_lock);
return range->base.flags.has_dma_mapping;
}
#define xe_svm_assert_in_notifier(vm__) \
lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
#define xe_svm_notifier_lock(vm__) \
drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
#define xe_svm_notifier_unlock(vm__) \
drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
#else
#include <linux/interval_tree.h>
struct drm_pagemap_device_addr;
struct xe_bo;
struct xe_gt;
struct xe_vm;
struct xe_vma;
struct xe_tile;
struct xe_vram_region;
#define XE_INTERCONNECT_VRAM 1
struct xe_svm_range {
struct {
struct interval_tree_node itree;
const struct drm_pagemap_device_addr *dma_addr;
} base;
u32 tile_present;
u32 tile_invalidated;
};
static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range)
{
return false;
@ -102,7 +147,7 @@ void xe_svm_close(struct xe_vm *vm)
static inline
int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
struct xe_tile *tile, u64 fault_addr,
struct xe_gt *gt, u64 fault_addr,
bool atomic)
{
return 0;
@ -124,27 +169,16 @@ static inline
void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
{
}
#endif
/**
* xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
* @range: SVM range
*
* Return: True if SVM range has a DMA mapping, False otherwise
*/
static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range)
#define xe_svm_assert_in_notifier(...) do {} while (0)
#define xe_svm_range_has_dma_mapping(...) false
static inline void xe_svm_notifier_lock(struct xe_vm *vm)
{
lockdep_assert_held(&range->base.gpusvm->notifier_lock);
return range->base.flags.has_dma_mapping;
}
#define xe_svm_assert_in_notifier(vm__) \
lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
#define xe_svm_notifier_lock(vm__) \
drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
#define xe_svm_notifier_unlock(vm__) \
drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
{
}
#endif
#endif

View File

@ -92,6 +92,8 @@
struct uc_fw_entry {
enum xe_platform platform;
enum xe_gt_type gt_type;
struct {
const char *path;
u16 major;
@ -106,32 +108,37 @@ struct fw_blobs_by_type {
u32 count;
};
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \
fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \
fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \
fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \
fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \
fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \
fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \
fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \
fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \
fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2))
/*
* Add an "ANY" define just to convey the meaning it's given here.
*/
#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \
fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \
fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \
fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \
fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \
fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \
fw_def(ALDERLAKE_S, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
fw_def(ROCKETLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1))
#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \
fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \
fw_def(LUNARLAKE, no_ver(xe, huc, lnl)) \
fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \
fw_def(DG1, no_ver(i915, huc, dg1)) \
fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \
fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \
fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \
fw_def(TIGERLAKE, no_ver(i915, huc, tgl))
fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \
fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \
fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \
fw_def(DG1, GT_TYPE_ANY, no_ver(i915, huc, dg1)) \
fw_def(ALDERLAKE_P, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \
fw_def(ALDERLAKE_S, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \
fw_def(ROCKETLAKE, GT_TYPE_ANY, no_ver(i915, huc, tgl)) \
fw_def(TIGERLAKE, GT_TYPE_ANY, no_ver(i915, huc, tgl))
/* for the GSC FW we match the compatibility version and not the release one */
#define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \
fw_def(LUNARLAKE, major_ver(xe, gsc, lnl, 104, 1, 0)) \
fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 102, 1, 0))
fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, gsc, lnl, 104, 1, 0)) \
fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, gsc, mtl, 102, 1, 0))
#define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \
__stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin"
@ -159,12 +166,13 @@ struct fw_blobs_by_type {
a, b, c }
/* All blobs need to be declared via MODULE_FIRMWARE() */
#define XE_UC_MODULE_FIRMWARE(platform__, fw_filename) \
#define XE_UC_MODULE_FIRMWARE(platform__, gt_type__, fw_filename) \
MODULE_FIRMWARE(fw_filename);
#define XE_UC_FW_ENTRY(platform__, entry__) \
#define XE_UC_FW_ENTRY(platform__, gt_type__, entry__) \
{ \
.platform = XE_ ## platform__, \
.gt_type = XE_ ## gt_type__, \
entry__, \
},
@ -222,30 +230,38 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
[XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) },
[XE_UC_FW_TYPE_GSC] = { entries_gsc, ARRAY_SIZE(entries_gsc) },
};
static const struct uc_fw_entry *entries;
struct xe_gt *gt = uc_fw_to_gt(uc_fw);
enum xe_platform p = xe->info.platform;
const struct uc_fw_entry *entries;
u32 count;
int i;
xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all));
xe_gt_assert(gt, uc_fw->type < ARRAY_SIZE(blobs_all));
xe_gt_assert(gt, gt->info.type != XE_GT_TYPE_UNINITIALIZED);
entries = blobs_all[uc_fw->type].entries;
count = blobs_all[uc_fw->type].count;
for (i = 0; i < count && p <= entries[i].platform; i++) {
if (p == entries[i].platform) {
uc_fw->path = entries[i].path;
uc_fw->versions.wanted.major = entries[i].major;
uc_fw->versions.wanted.minor = entries[i].minor;
uc_fw->versions.wanted.patch = entries[i].patch;
uc_fw->full_ver_required = entries[i].full_ver_required;
if (p != entries[i].platform)
continue;
if (uc_fw->type == XE_UC_FW_TYPE_GSC)
uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY;
else
uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
if (entries[i].gt_type != XE_GT_TYPE_ANY &&
entries[i].gt_type != gt->info.type)
continue;
break;
}
uc_fw->path = entries[i].path;
uc_fw->versions.wanted.major = entries[i].major;
uc_fw->versions.wanted.minor = entries[i].minor;
uc_fw->versions.wanted.patch = entries[i].patch;
uc_fw->full_ver_required = entries[i].full_ver_required;
if (uc_fw->type == XE_UC_FW_TYPE_GSC)
uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY;
else
uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
break;
}
}

View File

@ -2049,7 +2049,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
!xe->info.needs_scratch))
return -EINVAL;
if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
@ -2201,6 +2202,20 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
}
#endif
static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
{
if (!xe_vm_in_fault_mode(vm))
return false;
if (!xe_vm_has_scratch(vm))
return false;
if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
return false;
return true;
}
/*
* Create operations list from IOCTL arguments, setup operations fields so parse
* and commit steps are decoupled from IOCTL arguments. This step can fail.
@ -2273,6 +2288,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
op->map.pat_index = pat_index;
op->map.invalidate_on_bind =
__xe_vm_needs_clear_scratch_pages(vm, flags);
} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
op->prefetch.region = prefetch_region;
}
@ -2472,8 +2489,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
return PTR_ERR(vma);
op->map.vma = vma;
if ((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
!op->map.is_cpu_addr_mirror)
if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
!op->map.is_cpu_addr_mirror) ||
op->map.invalidate_on_bind)
xe_vma_ops_incr_pt_update_ops(vops,
op->tile_mask);
break;
@ -2726,9 +2744,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
err = vma_lock_and_validate(exec, op->map.vma,
!xe_vm_in_fault_mode(vm) ||
op->map.immediate);
if (!op->map.invalidate_on_bind)
err = vma_lock_and_validate(exec, op->map.vma,
!xe_vm_in_fault_mode(vm) ||
op->map.immediate);
break;
case DRM_GPUVA_OP_REMAP:
err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
@ -3109,7 +3128,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
(!xe_vm_in_fault_mode(vm) ||
!IS_ENABLED(CONFIG_DRM_GPUSVM)))) {
!IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
err = -EINVAL;
goto free_bind_ops;
}
@ -3243,7 +3262,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
XE_64K_PAGE_MASK) ||
XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
return -EINVAL;
return -EINVAL;
}
}
@ -3251,7 +3270,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
if (bo->cpu_caching) {
if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
return -EINVAL;
return -EINVAL;
}
} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
/*
@ -3260,7 +3279,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
* how it was mapped on the CPU. Just assume is it
* potentially cached on CPU side.
*/
return -EINVAL;
return -EINVAL;
}
/* If a BO is protected it can only be mapped if the key is still valid */

View File

@ -330,6 +330,8 @@ struct xe_vma_op_map {
bool is_cpu_addr_mirror;
/** @dumpable: whether BO is dumped on GPU hang */
bool dumpable;
/** @invalidate: invalidate the VMA before bind */
bool invalidate_on_bind;
/** @pat_index: The pat index to use for this operation. */
u16 pat_index;
};

View File

@ -49,7 +49,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
*/
static void resize_vram_bar(struct xe_device *xe)
{
u64 force_vram_bar_size = xe_modparam.force_vram_bar_size;
int force_vram_bar_size = xe_modparam.force_vram_bar_size;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct pci_bus *root = pdev->bus;
resource_size_t current_size;
@ -66,6 +66,9 @@ static void resize_vram_bar(struct xe_device *xe)
if (!bar_size_mask)
return;
if (force_vram_bar_size < 0)
return;
/* set to a specific size? */
if (force_vram_bar_size) {
u32 bar_size_bit;

View File

@ -230,6 +230,18 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
},
/* Xe2_HPG */
{ XE_RTP_NAME("16025250150"),
XE_RTP_RULES(GRAPHICS_VERSION(2001)),
XE_RTP_ACTIONS(SET(LSN_VC_REG2,
LSN_LNI_WGT(1) |
LSN_LNE_WGT(1) |
LSN_DIM_X_WGT(1) |
LSN_DIM_Y_WGT(1) |
LSN_DIM_Z_WGT(1)))
},
/* Xe2_HPM */
{ XE_RTP_NAME("16021867713"),

View File

@ -57,3 +57,5 @@ no_media_l3 MEDIA_VERSION(3000)
GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0)
16023105232 GRAPHICS_VERSION_RANGE(2001, 3001)
MEDIA_VERSION_RANGE(1301, 3000)
16026508708 GRAPHICS_VERSION_RANGE(1200, 3001)
MEDIA_VERSION_RANGE(1300, 3000)

View File

@ -917,7 +917,11 @@ struct drm_xe_gem_mmap_offset {
* struct drm_xe_vm_create - Input of &DRM_IOCTL_XE_VM_CREATE
*
* The @flags can be:
* - %DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE
* - %DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE - Map the whole virtual address
* space of the VM to scratch page. A vm_bind would overwrite the scratch
* page mapping. This flag is mutually exclusive with the
* %DRM_XE_VM_CREATE_FLAG_FAULT_MODE flag, with an exception of on x2 and
* xe3 platform.
* - %DRM_XE_VM_CREATE_FLAG_LR_MODE - An LR, or Long Running VM accepts
* exec submissions to its exec_queues that don't have an upper time
* limit on the job execution time. But exec submissions to these