mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-21 23:16:50 +08:00
To disable l2/l3 swizzling in A8x, set the respective bits in both
GRAS_NC_MODE_CNTL and RB_CCU_NC_MODE_CNTL registers. This is required
for Glymur where it is recommended to keep l2/l3 swizzling disabled.
Fixes: 288a932008 ("drm/msm/adreno: Introduce A8x GPU Support")
Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
Message-ID: <20260305-a8xx-ubwc-fix-v1-1-d99b6da4c5a9@oss.qualcomm.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
1216 lines
36 KiB
C
1216 lines
36 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
|
|
|
|
|
|
#include "msm_gem.h"
|
|
#include "msm_mmu.h"
|
|
#include "msm_gpu_trace.h"
|
|
#include "a6xx_gpu.h"
|
|
#include "a6xx_gmu.xml.h"
|
|
|
|
#include <linux/bitfield.h>
|
|
#include <linux/devfreq.h>
|
|
#include <linux/firmware/qcom/qcom_scm.h>
|
|
#include <linux/pm_domain.h>
|
|
#include <linux/soc/qcom/llcc-qcom.h>
|
|
|
|
#define GPU_PAS_ID 13
|
|
|
|
static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
u32 val;
|
|
|
|
val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice);
|
|
|
|
if (a6xx_gpu->cached_aperture == val)
|
|
return;
|
|
|
|
gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val);
|
|
|
|
a6xx_gpu->cached_aperture = val;
|
|
}
|
|
|
|
static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
|
|
spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags);
|
|
|
|
a8xx_aperture_slice_set(gpu, pipe, 0);
|
|
}
|
|
|
|
static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
|
|
spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
|
|
}
|
|
|
|
static void a8xx_aperture_clear(struct msm_gpu *gpu)
|
|
{
|
|
unsigned long flags;
|
|
|
|
a8xx_aperture_acquire(gpu, PIPE_NONE, &flags);
|
|
a8xx_aperture_release(gpu, flags);
|
|
}
|
|
|
|
static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data)
|
|
{
|
|
unsigned long flags;
|
|
|
|
a8xx_aperture_acquire(gpu, pipe, &flags);
|
|
gpu_write(gpu, offset, data);
|
|
a8xx_aperture_release(gpu, flags);
|
|
}
|
|
|
|
static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
unsigned long flags;
|
|
u32 val;
|
|
|
|
spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags);
|
|
a8xx_aperture_slice_set(gpu, pipe, slice);
|
|
val = gpu_read(gpu, offset);
|
|
spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
|
|
|
|
return val;
|
|
}
|
|
|
|
void a8xx_gpu_get_slice_info(struct msm_gpu *gpu)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
const struct a6xx_info *info = adreno_gpu->info->a6xx;
|
|
u32 slice_mask;
|
|
|
|
if (adreno_gpu->info->family < ADRENO_8XX_GEN1)
|
|
return;
|
|
|
|
if (a6xx_gpu->slice_mask)
|
|
return;
|
|
|
|
slice_mask = GENMASK(info->max_slices - 1, 0);
|
|
|
|
/* GEN1 doesn't support partial slice configurations */
|
|
if (adreno_gpu->info->family == ADRENO_8XX_GEN1) {
|
|
a6xx_gpu->slice_mask = slice_mask;
|
|
return;
|
|
}
|
|
|
|
slice_mask &= a6xx_llc_read(a6xx_gpu,
|
|
REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL);
|
|
|
|
a6xx_gpu->slice_mask = slice_mask;
|
|
|
|
/* Chip ID depends on the number of slices available. So update it */
|
|
adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask));
|
|
}
|
|
|
|
static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu)
|
|
{
|
|
return ffs(a6xx_gpu->slice_mask) - 1;
|
|
}
|
|
|
|
static inline bool _a8xx_check_idle(struct msm_gpu *gpu)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
|
|
/* Check that the GMU is idle */
|
|
if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
|
|
return false;
|
|
|
|
/* Check that the CX master is idle */
|
|
if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) &
|
|
~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
|
|
return false;
|
|
|
|
return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) &
|
|
A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
|
|
}
|
|
|
|
static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
|
|
{
|
|
/* wait for CP to drain ringbuffer: */
|
|
if (!adreno_idle(gpu, ring))
|
|
return false;
|
|
|
|
if (spin_until(_a8xx_check_idle(gpu))) {
|
|
DRM_ERROR(
|
|
"%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
|
|
gpu->name, __builtin_return_address(0),
|
|
gpu_read(gpu, REG_A8XX_RBBM_STATUS),
|
|
gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS),
|
|
gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
|
|
gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
uint32_t wptr;
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&ring->preempt_lock, flags);
|
|
|
|
/* Copy the shadow to the actual register */
|
|
ring->cur = ring->next;
|
|
|
|
/* Make sure to wrap wptr if we need to */
|
|
wptr = get_wptr(ring);
|
|
|
|
/* Update HW if this is the current ring and we are not in preempt*/
|
|
if (!a6xx_in_preempt(a6xx_gpu)) {
|
|
if (a6xx_gpu->cur_ring == ring)
|
|
gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
|
|
else
|
|
ring->restore_wptr = true;
|
|
} else {
|
|
ring->restore_wptr = true;
|
|
}
|
|
|
|
spin_unlock_irqrestore(&ring->preempt_lock, flags);
|
|
}
|
|
|
|
static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
|
|
u32 val;
|
|
|
|
if (adreno_is_x285(adreno_gpu) && state)
|
|
gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702);
|
|
|
|
gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
|
|
state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
|
|
gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
|
|
state ? 0x110111 : 0);
|
|
gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
|
|
state ? 0x55555 : 0);
|
|
|
|
gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
|
|
gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state);
|
|
|
|
if (state) {
|
|
gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1);
|
|
|
|
if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val,
|
|
val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
|
|
dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
|
|
return;
|
|
}
|
|
|
|
gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
|
|
} else {
|
|
/*
|
|
* GMU enables clk gating in GBIF during boot up. So,
|
|
* override that here when hwcg feature is disabled
|
|
*/
|
|
gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0);
|
|
}
|
|
}
|
|
|
|
static void a8xx_set_cp_protect(struct msm_gpu *gpu)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
|
|
u32 cntl, final_cfg;
|
|
unsigned int i;
|
|
|
|
cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN |
|
|
A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN |
|
|
A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE |
|
|
A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK;
|
|
/*
|
|
* Enable access protection to privileged registers, fault on an access
|
|
* protect violation and select the last span to protect from the start
|
|
* address all the way to the end of the register address space
|
|
*/
|
|
a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
|
|
a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
|
|
|
|
a8xx_aperture_clear(gpu);
|
|
|
|
for (i = 0; i < protect->count; i++) {
|
|
/* Intentionally skip writing to some registers */
|
|
if (protect->regs[i]) {
|
|
gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]);
|
|
final_cfg = protect->regs[i];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Last span feature is only supported on PIPE specific register.
|
|
* So update those here
|
|
*/
|
|
a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg);
|
|
a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg);
|
|
|
|
a8xx_aperture_clear(gpu);
|
|
}
|
|
|
|
static void a8xx_set_ubwc_config(struct msm_gpu *gpu)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
|
|
u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2);
|
|
u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3);
|
|
bool rgba8888_lossless = false, fp16compoptdis = false;
|
|
bool yuvnotcomptofc = false, min_acc_len_64b = false;
|
|
bool rgb565_predicator = false, amsbc = false;
|
|
bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
|
|
u32 ubwc_version = cfg->ubwc_enc_version;
|
|
u32 hbb, hbb_hi, hbb_lo, mode = 1;
|
|
u8 uavflagprd_inv = 2;
|
|
|
|
switch (ubwc_version) {
|
|
case UBWC_6_0:
|
|
yuvnotcomptofc = true;
|
|
mode = 5;
|
|
break;
|
|
case UBWC_5_0:
|
|
amsbc = true;
|
|
rgb565_predicator = true;
|
|
mode = 4;
|
|
break;
|
|
case UBWC_4_0:
|
|
amsbc = true;
|
|
rgb565_predicator = true;
|
|
fp16compoptdis = true;
|
|
rgba8888_lossless = true;
|
|
mode = 2;
|
|
break;
|
|
case UBWC_3_0:
|
|
amsbc = true;
|
|
mode = 1;
|
|
break;
|
|
default:
|
|
dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version);
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* We subtract 13 from the highest bank bit (13 is the minimum value
|
|
* allowed by hw) and write the lowest two bits of the remaining value
|
|
* as hbb_lo and the one above it as hbb_hi to the hardware.
|
|
*/
|
|
WARN_ON(cfg->highest_bank_bit < 13);
|
|
hbb = cfg->highest_bank_bit - 13;
|
|
hbb_hi = hbb >> 2;
|
|
hbb_lo = hbb & 3;
|
|
|
|
a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL,
|
|
hbb << 5 |
|
|
level3_swizzling_dis << 4 |
|
|
level2_swizzling_dis << 3);
|
|
|
|
a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL,
|
|
hbb << 5 |
|
|
level3_swizzling_dis << 4 |
|
|
level2_swizzling_dis << 3);
|
|
|
|
a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL,
|
|
yuvnotcomptofc << 6 |
|
|
level3_swizzling_dis << 5 |
|
|
level2_swizzling_dis << 4 |
|
|
hbb_hi << 3 |
|
|
hbb_lo << 1);
|
|
|
|
a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL,
|
|
mode << 15 |
|
|
yuvnotcomptofc << 6 |
|
|
rgba8888_lossless << 4 |
|
|
fp16compoptdis << 3 |
|
|
rgb565_predicator << 2 |
|
|
amsbc << 1 |
|
|
min_acc_len_64b);
|
|
|
|
a8xx_aperture_clear(gpu);
|
|
|
|
gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
|
|
level3_swizzling_dis << 13 |
|
|
level2_swizzling_dis << 12 |
|
|
hbb_hi << 10 |
|
|
uavflagprd_inv << 4 |
|
|
min_acc_len_64b << 3 |
|
|
hbb_lo << 1 | ubwc_mode);
|
|
|
|
gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
|
|
level3_swizzling_dis << 7 |
|
|
level2_swizzling_dis << 6 |
|
|
hbb_hi << 4 |
|
|
min_acc_len_64b << 3 |
|
|
hbb_lo << 1 | ubwc_mode);
|
|
}
|
|
|
|
static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
const struct a6xx_info *info = adreno_gpu->info->a6xx;
|
|
const struct adreno_reglist_pipe *regs = info->nonctxt_reglist;
|
|
unsigned int pipe_id, i;
|
|
unsigned long flags;
|
|
|
|
for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
|
|
/* We don't have support for LPAC yet */
|
|
if (pipe_id == PIPE_LPAC)
|
|
continue;
|
|
|
|
a8xx_aperture_acquire(gpu, pipe_id, &flags);
|
|
|
|
for (i = 0; regs[i].offset; i++) {
|
|
if (!(BIT(pipe_id) & regs[i].pipe))
|
|
continue;
|
|
|
|
if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT)
|
|
*gmem_protect = regs[i].value;
|
|
|
|
gpu_write(gpu, regs[i].offset, regs[i].value);
|
|
}
|
|
|
|
a8xx_aperture_release(gpu, flags);
|
|
}
|
|
|
|
a8xx_aperture_clear(gpu);
|
|
}
|
|
|
|
static int a8xx_cp_init(struct msm_gpu *gpu)
|
|
{
|
|
struct msm_ringbuffer *ring = gpu->rb[0];
|
|
u32 mask;
|
|
|
|
/* Disable concurrent binning before sending CP init */
|
|
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
|
|
OUT_RING(ring, BIT(27));
|
|
|
|
OUT_PKT7(ring, CP_ME_INIT, 4);
|
|
|
|
/* Use multiple HW contexts */
|
|
mask = BIT(0);
|
|
|
|
/* Enable error detection */
|
|
mask |= BIT(1);
|
|
|
|
/* Set default reset state */
|
|
mask |= BIT(3);
|
|
|
|
/* Disable save/restore of performance counters across preemption */
|
|
mask |= BIT(6);
|
|
|
|
OUT_RING(ring, mask);
|
|
|
|
/* Enable multiple hardware contexts */
|
|
OUT_RING(ring, 0x00000003);
|
|
|
|
/* Enable error detection */
|
|
OUT_RING(ring, 0x20000000);
|
|
|
|
/* Operation mode mask */
|
|
OUT_RING(ring, 0x00000002);
|
|
|
|
a6xx_flush(gpu, ring);
|
|
return a8xx_idle(gpu, ring) ? 0 : -EINVAL;
|
|
}
|
|
|
|
#define A8XX_INT_MASK \
|
|
(A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
|
|
A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
|
|
A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
|
|
A6XX_RBBM_INT_0_MASK_CP_SW | \
|
|
A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
|
|
A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
|
|
A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
|
|
A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
|
|
A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
|
|
A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
|
|
A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
|
|
A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
|
|
A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
|
|
A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
|
|
|
|
#define A8XX_APRIV_MASK \
|
|
(A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \
|
|
A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \
|
|
A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \
|
|
A8XX_CP_APRIV_CNTL_PIPE_RBRPWB)
|
|
|
|
#define A8XX_BR_APRIV_MASK \
|
|
(A8XX_APRIV_MASK | \
|
|
A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \
|
|
A8XX_CP_APRIV_CNTL_PIPE_CDWRITE)
|
|
|
|
#define A8XX_CP_GLOBAL_INT_MASK \
|
|
(A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \
|
|
A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \
|
|
A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \
|
|
A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \
|
|
A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \
|
|
A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \
|
|
A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \
|
|
A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \
|
|
A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \
|
|
A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \
|
|
A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \
|
|
A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \
|
|
A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \
|
|
A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV)
|
|
|
|
#define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \
|
|
(A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS)
|
|
|
|
#define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \
|
|
(A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \
|
|
A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \
|
|
A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \
|
|
A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \
|
|
A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \
|
|
A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \
|
|
A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \
|
|
A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \
|
|
A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \
|
|
A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR)
|
|
|
|
static int hw_init(struct msm_gpu *gpu)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
|
|
unsigned int pipe_id, i;
|
|
u32 gmem_protect = 0;
|
|
u64 gmem_range_min;
|
|
int ret;
|
|
|
|
ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/* Clear the cached value to force aperture configuration next time */
|
|
a6xx_gpu->cached_aperture = UINT_MAX;
|
|
a8xx_aperture_clear(gpu);
|
|
|
|
/* Clear GBIF halt in case GX domain was not collapsed */
|
|
gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
|
|
gpu_read(gpu, REG_A6XX_GBIF_HALT);
|
|
|
|
gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0);
|
|
gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT);
|
|
|
|
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
|
|
|
|
/*
|
|
* Disable the trusted memory range - we don't actually supported secure
|
|
* memory rendering at this point in time and we don't want to block off
|
|
* part of the virtual memory space.
|
|
*/
|
|
gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
|
|
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
|
|
|
|
/* Make all blocks contribute to the GPU BUSY perf counter */
|
|
gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
|
|
|
|
/* Setup GMEM Range in UCHE */
|
|
gmem_range_min = SZ_64M;
|
|
/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
|
|
gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min);
|
|
gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min);
|
|
|
|
/* Setup UCHE Trap region */
|
|
gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
|
|
gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
|
|
gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
|
|
gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
|
|
|
|
/* Turn on performance counters */
|
|
gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1);
|
|
gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1);
|
|
|
|
/* Turn on the IFPC counter (countable 4 on XOCLK1) */
|
|
gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1,
|
|
FIELD_PREP(GENMASK(7, 0), 0x4));
|
|
|
|
/* Select CP0 to always count cycles */
|
|
gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1);
|
|
|
|
a8xx_set_ubwc_config(gpu);
|
|
|
|
/* Set weights for bicubic filtering */
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7);
|
|
gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7);
|
|
|
|
gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
|
|
|
|
a8xx_nonctxt_config(gpu, &gmem_protect);
|
|
|
|
/* Enable fault detection */
|
|
gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff);
|
|
gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30));
|
|
|
|
/* Set up the CX GMU counter 0 to count busy ticks */
|
|
gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
|
|
|
|
/* Enable the power counter */
|
|
gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5));
|
|
gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
|
|
|
|
/* Protect registers from the CP */
|
|
a8xx_set_cp_protect(gpu);
|
|
|
|
/* Enable the GMEM save/restore feature for preemption */
|
|
a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1);
|
|
|
|
for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) {
|
|
u32 apriv_mask = A8XX_APRIV_MASK;
|
|
unsigned long flags;
|
|
|
|
if (pipe_id == PIPE_LPAC)
|
|
continue;
|
|
|
|
if (pipe_id == PIPE_BR)
|
|
apriv_mask = A8XX_BR_APRIV_MASK;
|
|
|
|
a8xx_aperture_acquire(gpu, pipe_id, &flags);
|
|
gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask);
|
|
gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE,
|
|
A8XX_CP_INTERRUPT_STATUS_MASK_PIPE);
|
|
gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE,
|
|
A8XX_CP_HW_FAULT_STATUS_MASK_PIPE);
|
|
a8xx_aperture_release(gpu, flags);
|
|
}
|
|
|
|
a8xx_aperture_clear(gpu);
|
|
|
|
/* Enable interrupts */
|
|
gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK);
|
|
gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK);
|
|
|
|
ret = adreno_hw_init(gpu);
|
|
if (ret)
|
|
goto out;
|
|
|
|
gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
|
|
if (a6xx_gpu->aqe_iova)
|
|
gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova);
|
|
|
|
/* Set the ringbuffer address */
|
|
gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
|
|
gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
|
|
|
|
/* Configure the RPTR shadow if needed: */
|
|
gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0]));
|
|
gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr));
|
|
|
|
for (i = 0; i < gpu->nr_rings; i++)
|
|
a6xx_gpu->shadow[i] = 0;
|
|
|
|
/* Always come up on rb 0 */
|
|
a6xx_gpu->cur_ring = gpu->rb[0];
|
|
|
|
for (i = 0; i < gpu->nr_rings; i++)
|
|
gpu->rb[i]->cur_ctx_seqno = 0;
|
|
|
|
/* Enable the SQE_to start the CP engine */
|
|
gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1);
|
|
|
|
ret = a8xx_cp_init(gpu);
|
|
if (ret)
|
|
goto out;
|
|
|
|
/*
|
|
* Try to load a zap shader into the secure world. If successful
|
|
* we can use the CP to switch out of secure mode. If not then we
|
|
* have no resource but to try to switch ourselves out manually. If we
|
|
* guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
|
|
* be blocked and a permissions violation will soon follow.
|
|
*/
|
|
ret = a6xx_zap_shader_init(gpu);
|
|
if (!ret) {
|
|
OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
|
|
OUT_RING(gpu->rb[0], 0x00000000);
|
|
|
|
a6xx_flush(gpu, gpu->rb[0]);
|
|
if (!a8xx_idle(gpu, gpu->rb[0]))
|
|
return -EINVAL;
|
|
} else if (ret == -ENODEV) {
|
|
/*
|
|
* This device does not use zap shader (but print a warning
|
|
* just in case someone got their dt wrong.. hopefully they
|
|
* have a debug UART to realize the error of their ways...
|
|
* if you mess this up you are about to crash horribly)
|
|
*/
|
|
dev_warn_once(gpu->dev->dev,
|
|
"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
|
|
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
|
|
ret = 0;
|
|
} else {
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* GMEM_PROTECT register should be programmed after GPU is transitioned to
|
|
* non-secure mode
|
|
*/
|
|
a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect);
|
|
WARN_ON(!gmem_protect);
|
|
a8xx_aperture_clear(gpu);
|
|
|
|
/* Enable hardware clockgating */
|
|
a8xx_set_hwcg(gpu, true);
|
|
out:
|
|
/*
|
|
* Tell the GMU that we are done touching the GPU and it can start power
|
|
* management
|
|
*/
|
|
a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int a8xx_hw_init(struct msm_gpu *gpu)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
int ret;
|
|
|
|
mutex_lock(&a6xx_gpu->gmu.lock);
|
|
ret = hw_init(gpu);
|
|
mutex_unlock(&a6xx_gpu->gmu.lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void a8xx_dump(struct msm_gpu *gpu)
|
|
{
|
|
DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS));
|
|
adreno_dump(gpu);
|
|
}
|
|
|
|
void a8xx_recover(struct msm_gpu *gpu)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
|
|
int active_submits;
|
|
|
|
adreno_dump_info(gpu);
|
|
|
|
if (hang_debug)
|
|
a8xx_dump(gpu);
|
|
|
|
/*
|
|
* To handle recovery specific sequences during the rpm suspend we are
|
|
* about to trigger
|
|
*/
|
|
a6xx_gpu->hung = true;
|
|
|
|
/* Halt SQE first */
|
|
gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3);
|
|
|
|
pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
|
|
|
|
/* active_submit won't change until we make a submission */
|
|
mutex_lock(&gpu->active_lock);
|
|
active_submits = gpu->active_submits;
|
|
|
|
/*
|
|
* Temporarily clear active_submits count to silence a WARN() in the
|
|
* runtime suspend cb
|
|
*/
|
|
gpu->active_submits = 0;
|
|
|
|
reinit_completion(&gmu->pd_gate);
|
|
dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
|
|
dev_pm_genpd_synced_poweroff(gmu->cxpd);
|
|
|
|
/* Drop the rpm refcount from active submits */
|
|
if (active_submits)
|
|
pm_runtime_put(&gpu->pdev->dev);
|
|
|
|
/* And the final one from recover worker */
|
|
pm_runtime_put_sync(&gpu->pdev->dev);
|
|
|
|
if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
|
|
DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
|
|
|
|
dev_pm_genpd_remove_notifier(gmu->cxpd);
|
|
|
|
pm_runtime_use_autosuspend(&gpu->pdev->dev);
|
|
|
|
if (active_submits)
|
|
pm_runtime_get(&gpu->pdev->dev);
|
|
|
|
pm_runtime_get_sync(&gpu->pdev->dev);
|
|
|
|
gpu->active_submits = active_submits;
|
|
mutex_unlock(&gpu->active_lock);
|
|
|
|
msm_gpu_hw_init(gpu);
|
|
a6xx_gpu->hung = false;
|
|
}
|
|
|
|
static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
|
|
{
|
|
static const char * const uche_clients[] = {
|
|
"BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
|
|
"BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
|
|
"STCHE",
|
|
};
|
|
static const char * const uche_clients_lpac[] = {
|
|
"-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC",
|
|
};
|
|
u32 val;
|
|
|
|
/*
|
|
* The source of the data depends on the mid ID read from FSYNR1.
|
|
* and the client ID read from the UCHE block
|
|
*/
|
|
val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF);
|
|
|
|
val &= GENMASK(6, 0);
|
|
|
|
/* mid=3 refers to BR or BV */
|
|
if (mid == 3) {
|
|
if (val < ARRAY_SIZE(uche_clients))
|
|
return uche_clients[val];
|
|
else
|
|
return "UCHE";
|
|
}
|
|
|
|
/* mid=8 refers to LPAC */
|
|
if (mid == 8) {
|
|
if (val < ARRAY_SIZE(uche_clients_lpac))
|
|
return uche_clients_lpac[val];
|
|
else
|
|
return "UCHE_LPAC";
|
|
}
|
|
|
|
return "Unknown";
|
|
}
|
|
|
|
static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id)
|
|
{
|
|
switch (id) {
|
|
case 0x0:
|
|
return "CP";
|
|
case 0x1:
|
|
return "UCHE: Unknown";
|
|
case 0x2:
|
|
return "UCHE_LPAC: Unknown";
|
|
case 0x3:
|
|
case 0x8:
|
|
return a8xx_uche_fault_block(gpu, id);
|
|
case 0x4:
|
|
return "CCU";
|
|
case 0x5:
|
|
return "Flag cache";
|
|
case 0x6:
|
|
return "PREFETCH";
|
|
case 0x7:
|
|
return "GMU";
|
|
case 0x9:
|
|
return "UCHE_HPAC";
|
|
}
|
|
|
|
return "Unknown";
|
|
}
|
|
|
|
int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
|
|
{
|
|
struct msm_gpu *gpu = arg;
|
|
struct adreno_smmu_fault_info *info = data;
|
|
const char *block = "unknown";
|
|
|
|
u32 scratch[] = {
|
|
gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)),
|
|
gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)),
|
|
gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)),
|
|
gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)),
|
|
};
|
|
|
|
if (info)
|
|
block = a8xx_fault_block(gpu, info->fsynr1 & 0xff);
|
|
|
|
return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
|
|
}
|
|
|
|
static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu)
|
|
{
|
|
u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL);
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
u32 slice = a8xx_get_first_slice(a6xx_gpu);
|
|
u32 hw_fault_mask = GENMASK(6, 0);
|
|
u32 sw_fault_mask = GENMASK(22, 16);
|
|
u32 pipe = 0;
|
|
|
|
dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status);
|
|
|
|
if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR |
|
|
A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR))
|
|
pipe |= BIT(PIPE_BR);
|
|
|
|
if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV |
|
|
A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV))
|
|
pipe |= BIT(PIPE_BV);
|
|
|
|
if (!pipe) {
|
|
dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n");
|
|
goto out;
|
|
}
|
|
|
|
for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
|
|
if (!(BIT(pipe_id) & pipe))
|
|
continue;
|
|
|
|
if (hw_fault_mask & status) {
|
|
status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
|
|
REG_A8XX_CP_HW_FAULT_STATUS_PIPE);
|
|
dev_err_ratelimited(&gpu->pdev->dev,
|
|
"CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
|
|
}
|
|
|
|
if (sw_fault_mask & status) {
|
|
status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
|
|
REG_A8XX_CP_INTERRUPT_STATUS_PIPE);
|
|
dev_err_ratelimited(&gpu->pdev->dev,
|
|
"CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
|
|
|
|
if (status & BIT(8)) {
|
|
a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1);
|
|
status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
|
|
REG_A8XX_CP_SQE_STAT_DATA_PIPE);
|
|
dev_err_ratelimited(&gpu->pdev->dev,
|
|
"CP Opcode error, opcode=0x%x\n", status);
|
|
}
|
|
|
|
if (status & BIT(10)) {
|
|
status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
|
|
REG_A8XX_CP_PROTECT_STATUS_PIPE);
|
|
dev_err_ratelimited(&gpu->pdev->dev,
|
|
"CP REG PROTECT error, status=0x%x\n", status);
|
|
}
|
|
}
|
|
}
|
|
|
|
out:
|
|
/* Turn off interrupts to avoid triggering recovery again */
|
|
a8xx_aperture_clear(gpu);
|
|
gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0);
|
|
gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0);
|
|
|
|
kthread_queue_work(gpu->worker, &gpu->recover_work);
|
|
}
|
|
|
|
static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset)
|
|
{
|
|
gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset);
|
|
|
|
return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE);
|
|
}
|
|
|
|
static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset)
|
|
{
|
|
u64 lo, hi;
|
|
|
|
lo = gpu_periph_read(gpu, dbg_offset);
|
|
hi = gpu_periph_read(gpu, dbg_offset + 1);
|
|
|
|
return (hi << 32) | lo;
|
|
}
|
|
|
|
#define CP_PERIPH_IB1_BASE_LO 0x7005
|
|
#define CP_PERIPH_IB1_BASE_HI 0x7006
|
|
#define CP_PERIPH_IB1_SIZE 0x7007
|
|
#define CP_PERIPH_IB1_OFFSET 0x7008
|
|
#define CP_PERIPH_IB2_BASE_LO 0x7009
|
|
#define CP_PERIPH_IB2_BASE_HI 0x700a
|
|
#define CP_PERIPH_IB2_SIZE 0x700b
|
|
#define CP_PERIPH_IB2_OFFSET 0x700c
|
|
#define CP_PERIPH_IB3_BASE_LO 0x700d
|
|
#define CP_PERIPH_IB3_BASE_HI 0x700e
|
|
#define CP_PERIPH_IB3_SIZE 0x700f
|
|
#define CP_PERIPH_IB3_OFFSET 0x7010
|
|
|
|
static void a8xx_fault_detect_irq(struct msm_gpu *gpu)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
|
|
unsigned long flags;
|
|
|
|
/*
|
|
* If stalled on SMMU fault, we could trip the GPU's hang detection,
|
|
* but the fault handler will trigger the devcore dump, and we want
|
|
* to otherwise resume normally rather than killing the submit, so
|
|
* just bail.
|
|
*/
|
|
if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT)
|
|
return;
|
|
|
|
/*
|
|
* Force the GPU to stay on until after we finish
|
|
* collecting information
|
|
*/
|
|
if (!adreno_has_gmu_wrapper(adreno_gpu))
|
|
gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
|
|
|
|
DRM_DEV_ERROR(&gpu->pdev->dev,
|
|
"gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n",
|
|
ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
|
|
gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS));
|
|
|
|
a8xx_aperture_acquire(gpu, PIPE_BR, &flags);
|
|
|
|
DRM_DEV_ERROR(&gpu->pdev->dev,
|
|
"BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
|
|
gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS),
|
|
gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
|
|
gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
|
|
gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
|
|
gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
|
|
gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
|
|
gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
|
|
gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
|
|
gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
|
|
|
|
a8xx_aperture_release(gpu, flags);
|
|
a8xx_aperture_acquire(gpu, PIPE_BV, &flags);
|
|
|
|
DRM_DEV_ERROR(&gpu->pdev->dev,
|
|
"BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
|
|
gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS),
|
|
gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV),
|
|
gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
|
|
gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
|
|
gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
|
|
gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
|
|
gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
|
|
gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
|
|
gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
|
|
|
|
a8xx_aperture_release(gpu, flags);
|
|
a8xx_aperture_clear(gpu);
|
|
|
|
/* Turn off the hangcheck timer to keep it from bothering us */
|
|
timer_delete(&gpu->hangcheck_timer);
|
|
|
|
kthread_queue_work(gpu->worker, &gpu->recover_work);
|
|
}
|
|
|
|
static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
|
|
{
|
|
u32 status;
|
|
|
|
status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS);
|
|
gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0);
|
|
|
|
dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
|
|
|
|
/*
|
|
* Ignore FASTBLEND violations, because the HW will silently fall back
|
|
* to legacy blending.
|
|
*/
|
|
if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
|
|
A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
|
|
timer_delete(&gpu->hangcheck_timer);
|
|
|
|
kthread_queue_work(gpu->worker, &gpu->recover_work);
|
|
}
|
|
}
|
|
|
|
irqreturn_t a8xx_irq(struct msm_gpu *gpu)
|
|
{
|
|
struct msm_drm_private *priv = gpu->dev->dev_private;
|
|
u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS);
|
|
|
|
gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status);
|
|
|
|
if (priv->disable_err_irq)
|
|
status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
|
|
|
|
if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
|
|
a8xx_fault_detect_irq(gpu);
|
|
|
|
if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) {
|
|
u32 rl0, rl1;
|
|
|
|
rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0);
|
|
rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1);
|
|
dev_err_ratelimited(&gpu->pdev->dev,
|
|
"CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1);
|
|
}
|
|
|
|
if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
|
|
a8xx_cp_hw_err_irq(gpu);
|
|
|
|
if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
|
|
dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
|
|
|
|
if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
|
|
dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
|
|
|
|
if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
|
|
dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
|
|
|
|
if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
|
|
dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n");
|
|
|
|
if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
|
|
a8xx_sw_fuse_violation_irq(gpu);
|
|
|
|
if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
|
|
msm_gpu_retire(gpu);
|
|
a6xx_preempt_trigger(gpu);
|
|
}
|
|
|
|
if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
|
|
a6xx_preempt_irq(gpu);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
|
|
struct msm_gpu *gpu = &adreno_gpu->base;
|
|
|
|
if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
|
|
u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
|
|
|
|
gpu_scid &= GENMASK(5, 0);
|
|
|
|
gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
|
|
FIELD_PREP(GENMASK(29, 24), gpu_scid) |
|
|
FIELD_PREP(GENMASK(23, 18), gpu_scid) |
|
|
FIELD_PREP(GENMASK(17, 12), gpu_scid) |
|
|
FIELD_PREP(GENMASK(11, 6), gpu_scid) |
|
|
FIELD_PREP(GENMASK(5, 0), gpu_scid));
|
|
|
|
gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
|
|
FIELD_PREP(GENMASK(27, 22), gpu_scid) |
|
|
FIELD_PREP(GENMASK(21, 16), gpu_scid) |
|
|
FIELD_PREP(GENMASK(15, 10), gpu_scid) |
|
|
BIT(8));
|
|
}
|
|
|
|
llcc_slice_activate(a6xx_gpu->htw_llc_slice);
|
|
}
|
|
|
|
#define GBIF_CLIENT_HALT_MASK BIT(0)
|
|
#define GBIF_ARB_HALT_MASK BIT(1)
|
|
#define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0)
|
|
#define VBIF_RESET_ACK_MASK 0xF0
|
|
#define GPR0_GBIF_HALT_REQUEST 0x1E0
|
|
|
|
void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
|
|
{
|
|
struct msm_gpu *gpu = &adreno_gpu->base;
|
|
|
|
if (gx_off) {
|
|
/* Halt the gx side of GBIF */
|
|
gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1);
|
|
spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1);
|
|
}
|
|
|
|
/* Halt new client requests on GBIF */
|
|
gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
|
|
spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
|
|
(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
|
|
|
|
/* Halt all AXI requests on GBIF */
|
|
gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
|
|
spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
|
|
(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
|
|
|
|
/* The GBIF halt needs to be explicitly cleared */
|
|
gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
|
|
}
|
|
|
|
int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
|
|
mutex_lock(&a6xx_gpu->gmu.lock);
|
|
|
|
/* Force the GPU power on so we can read this register */
|
|
a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
|
|
|
|
*value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER);
|
|
|
|
a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
|
|
|
|
mutex_unlock(&a6xx_gpu->gmu.lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
|
|
{
|
|
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
|
|
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
|
|
u64 busy_cycles;
|
|
|
|
/* 19.2MHz */
|
|
*out_sample_rate = 19200000;
|
|
|
|
busy_cycles = gmu_read64(&a6xx_gpu->gmu,
|
|
REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
|
|
REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
|
|
|
|
return busy_cycles;
|
|
}
|
|
|
|
bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
|
|
{
|
|
return true;
|
|
}
|