2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00
linux/drivers/gpu/drm/msm/msm_mmu.h
Connor Abbott b13044092c drm/msm: Temporarily disable stall-on-fault after a page fault
When things go wrong, the GPU is capable of quickly generating millions
of faulting translation requests per second. When that happens, in the
stall-on-fault model each access will stall until it wins the race to
signal the fault and then the RESUME register is written. This slows
processing page faults to a crawl as the GPU can generate faults much
faster than the CPU can acknowledge them. It also means that all
available resources in the SMMU are saturated waiting for the stalled
transactions, so that other transactions such as transactions generated
by the GMU, which shares translation resources with the GPU, cannot
proceed. This causes a GMU watchdog timeout, which leads to a failed
reset because GX cannot collapse when there is a transaction pending and
a permanently hung GPU.

On older platforms with qcom,smmu-v2, it seems that when one transaction
is stalled subsequent faulting transactions are terminated, which avoids
this problem, but the MMU-500 follows the spec here.

To work around these problems, disable stall-on-fault as soon as we get a
page fault until a cooldown period after pagefaults stop. This allows
the GMU some guaranteed time to continue working. We only use
stall-on-fault to halt the GPU while we collect a devcoredump and we
always terminate the transaction afterward, so it's fine to miss some
subsequent page faults. We also keep it disabled so long as the current
devcoredump hasn't been deleted, because in that case we likely won't
capture another one if there's a fault.

After this commit HFI messages still occasionally time out, because the
crashdump handler doesn't run fast enough to let the GMU resume, but the
driver seems to recover from it. This will probably go away after the
HFI timeout is increased.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/654891/
Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
2025-06-09 11:37:34 -07:00

62 lines
1.7 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2013 Red Hat
* Author: Rob Clark <robdclark@gmail.com>
*/
#ifndef __MSM_MMU_H__
#define __MSM_MMU_H__
#include <linux/iommu.h>
struct msm_mmu_funcs {
void (*detach)(struct msm_mmu *mmu);
int (*map)(struct msm_mmu *mmu, uint64_t iova, struct sg_table *sgt,
size_t len, int prot);
int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len);
void (*destroy)(struct msm_mmu *mmu);
void (*set_stall)(struct msm_mmu *mmu, bool enable);
};
enum msm_mmu_type {
MSM_MMU_GPUMMU,
MSM_MMU_IOMMU,
MSM_MMU_IOMMU_PAGETABLE,
};
struct msm_mmu {
const struct msm_mmu_funcs *funcs;
struct device *dev;
int (*handler)(void *arg, unsigned long iova, int flags, void *data);
void *arg;
enum msm_mmu_type type;
};
static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,
const struct msm_mmu_funcs *funcs, enum msm_mmu_type type)
{
mmu->dev = dev;
mmu->funcs = funcs;
mmu->type = type;
}
struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks);
struct msm_mmu *msm_iommu_gpu_new(struct device *dev, struct msm_gpu *gpu, unsigned long quirks);
struct msm_mmu *msm_iommu_disp_new(struct device *dev, unsigned long quirks);
static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,
int (*handler)(void *arg, unsigned long iova, int flags, void *data))
{
mmu->arg = arg;
mmu->handler = handler;
}
struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);
int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,
int *asid);
int msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova, uint64_t ptes[4]);
struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu);
#endif /* __MSM_MMU_H__ */