From b49814033cb5224c818cfb04dccb3260da10cc4f Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 11 Mar 2026 15:18:37 -0600 Subject: [PATCH 01/18] drm/amd/display: Fix gamma 2.2 colorop TFs Use GAMMA22 for degamma/blend and GAMMA22_INV for shaper so curves match the color pipeline. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5016 Tested-by: Xaver Hugl Reviewed-by: Melissa Wen Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit d8f9f42effd767ffa7bbcd7e05fbd6b20737e468) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index d59ba82d3d7c..aa4658867e55 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -37,19 +37,19 @@ const u64 amdgpu_dm_supported_degam_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22); const u64 amdgpu_dm_supported_shaper_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); const u64 amdgpu_dm_supported_blnd_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22); #define MAX_COLOR_PIPELINE_OPS 10 From 6270b1a5dab94665d7adce3dc78bc9066ed28bdd Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Thu, 12 Mar 2026 18:06:17 +0800 Subject: [PATCH 02/18] drm/amdgpu: Limit BO list entry count to prevent resource exhaustion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace can pass an arbitrary number of BO list entries via the bo_number field. Although the previous multiplication overflow check prevents out-of-bounds allocation, a large number of entries could still cause excessive memory allocation (up to potentially gigabytes) and unnecessarily long list processing times. Introduce a hard limit of 128k entries per BO list, which is more than sufficient for any realistic use case (e.g., a single list containing all buffers in a large scene). This prevents memory exhaustion attacks and ensures predictable performance. Return -EINVAL if the requested entry count exceeds the limit Reviewed-by: Christian König Suggested-by: Christian König Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit 688b87d39e0aa8135105b40dc167d74b5ada5332) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 4662bfbe70b2..43864df8af04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -36,6 +36,7 @@ #define AMDGPU_BO_LIST_MAX_PRIORITY 32u #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) +#define AMDGPU_BO_LIST_MAX_ENTRIES (128 * 1024) static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { @@ -188,6 +189,9 @@ int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, const uint32_t bo_number = in->bo_number; struct drm_amdgpu_bo_list_entry *info; + if (bo_number > AMDGPU_BO_LIST_MAX_ENTRIES) + return -EINVAL; + /* copy the handle array from userspace to a kernel buffer */ if (likely(info_size == bo_info_size)) { info = vmemdup_array_user(uptr, bo_number, info_size); From 10718159890bc99cbcc7b5a38dade05df335e797 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 12 Mar 2026 10:13:34 -0700 Subject: [PATCH 03/18] drm/amd/display: Fix uninitialized variable use which breaks full LTO Commit e1b385726f7f ("drm/amd/display: Add additional checks for PSP footer size") introduced a use of an uninitialized stack variable in dm_dmub_sw_init() (region_params.bss_data_size). Interestingly, this seems to cause no issue on normal kernels. But when full LTO is enabled, it causes the compiler to "optimize" out huge swaths of amdgpu initialization code, and the driver is unusable: amdgpu 0000:03:00.0: [drm] Loading DMUB firmware via PSP: version=0x07002F00 amdgpu 0000:03:00.0: sw_init of IP block failed 5 amdgpu 0000:03:00.0: amdgpu_device_ip_init failed amdgpu 0000:03:00.0: Fatal error during GPU init It surprises me that neither gcc nor clang emit a warning about this: I only found it by bisecting the LTO breakage. Fix by using the bss_data_size field from fw_meta_info_params, as was presumably intended. Fixes: e1b385726f7f ("drm/amd/display: Add additional checks for PSP footer size") Signed-off-by: Calvin Owens Reviewed-by: Harry Wentland Reviewed-by: Nathan Chancellor Signed-off-by: Alex Deucher (cherry picked from commit b7f1402f6ad24cc6b9a01fa09ebd1c6559d787d0) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b3d6f2cd8ab6..0d1c772ef713 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2554,7 +2554,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) fw_meta_info_params.fw_inst_const = adev->dm.dmub_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + PSP_HEADER_BYTES_256; - fw_meta_info_params.fw_bss_data = region_params.bss_data_size ? adev->dm.dmub_fw->data + + fw_meta_info_params.fw_bss_data = fw_meta_info_params.bss_data_size ? adev->dm.dmub_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + le32_to_cpu(hdr->inst_const_bytes) : NULL; fw_meta_info_params.custom_psp_footer_size = 0; From ebe82c6e75cfc547154d0fd843b0dd6cca3d548f Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 6 Mar 2026 14:28:03 +0800 Subject: [PATCH 04/18] drm/amd/display: Wrap dcn32_override_min_req_memclk() in DC_FP_{START, END} [Why] The dcn32_override_min_req_memclk function is in dcn32_fpu.c, which is compiled with CC_FLAGS_FPU into FP instructions. So when we call it we must use DC_FP_{START,END} to save and restore the FP context, and prepare the FP unit on architectures like LoongArch where the FP unit isn't always on. Reported-by: LiarOnce Fixes: ee7be8f3de1c ("drm/amd/display: Limit DCN32 8 channel or less parts to DPM1 for FPO") Signed-off-by: Xi Ruoyao Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 25bb1d54ba3983c064361033a8ec15474fece37e) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 7ebb7d1193af..c7fd604024d6 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1785,7 +1785,10 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, enum dc_valid dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_START(); dcn32_override_min_req_memclk(dc, context); + DC_FP_END(); + dcn32_override_min_req_dcfclk(dc, context); BW_VAL_TRACE_END_WATERMARKS(); From 2323b019651ad81c20a0f7f817c63392b3110652 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 15 Mar 2026 18:30:26 +0530 Subject: [PATCH 05/18] drm/amd/display: Fix DisplayID not-found handling in parse_edid_displayid_vrr() parse_edid_displayid_vrr() searches the EDID extension blocks for a DisplayID extension before parsing the dynamic video timing range. The code previously checked whether edid_ext was NULL after the search loop. However, edid_ext is assigned during each iteration of the loop, so it will never be NULL once the loop has executed. If no DisplayID extension is found, edid_ext ends up pointing to the last extension block, and the NULL check does not correctly detect the failure case. Instead, check whether the loop completed without finding a matching DisplayID block by testing "i == edid->extensions". This ensures the function exits early when no DisplayID extension is present and avoids parsing an unrelated EDID extension block. Also simplify the EDID validation check using "!edid || !edid->extensions". Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:13079 parse_edid_displayid_vrr() warn: variable dereferenced before check 'edid_ext' (see line 13075) Fixes: a638b837d0e6 ("drm/amd/display: Fix refresh rate range for some panel") Cc: Roman Li Cc: Alex Hung Cc: Jerry Zuo Cc: Sun peng Li Cc: Tom Chung Cc: Dan Carpenter Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher (cherry picked from commit 91c7e6342e98c846b259c57273436fdea4c043f2) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0d1c772ef713..085cc98bd875 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -13119,7 +13119,7 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector, u16 min_vfreq; u16 max_vfreq; - if (edid == NULL || edid->extensions == 0) + if (!edid || !edid->extensions) return; /* Find DisplayID extension */ @@ -13129,7 +13129,7 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector, break; } - if (edid_ext == NULL) + if (i == edid->extensions) return; while (j < EDID_LENGTH) { From 39f44f54afa58661ecae9c27e15f5dbce2372892 Mon Sep 17 00:00:00 2001 From: Andy Nguyen Date: Sun, 15 Mar 2026 17:51:47 +0100 Subject: [PATCH 06/18] drm/amd: fix dcn 2.01 check The ASICREV_IS_BEIGE_GOBY_P check always took precedence, because it includes all chip revisions upto NV_UNKNOWN. Fixes: 54b822b3eac3 ("drm/amd/display: Use dce_version instead of chip_id") Signed-off-by: Andy Nguyen Signed-off-by: Alex Deucher (cherry picked from commit 9c7be0efa6f0daa949a5f3e3fdf9ea090b0713cb) --- drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 08d0e05a313e..d237d7b41dfd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -255,6 +255,10 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p BREAK_TO_DEBUGGER(); return NULL; } + if (ctx->dce_version == DCN_VERSION_2_01) { + dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + return &clk_mgr->base; + } if (ASICREV_IS_SIENNA_CICHLID_P(asic_id.hw_internal_rev)) { dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; @@ -267,10 +271,6 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; } - if (ctx->dce_version == DCN_VERSION_2_01) { - dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); - return &clk_mgr->base; - } dcn20_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; } From 0b26edac4ac5535df1f63e6e8ab44c24fe1acad7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 4 Mar 2026 17:22:43 -0500 Subject: [PATCH 07/18] drm/amdgpu/mmhub2.0: add bounds checking for cid The value should never exceed the array size as those are the only values the hardware is expected to return, but add checks anyway. Reviewed-by: Benjamin Cheng Signed-off-by: Alex Deucher (cherry picked from commit e064cef4b53552602bb6ac90399c18f662f3cacd) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index a0cc8e218ca1..534cb4c544dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -154,14 +154,17 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 0, 0): case IP_VERSION(2, 0, 2): - mmhub_cid = mmhub_client_ids_navi1x[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_navi1x) ? + mmhub_client_ids_navi1x[cid][rw] : NULL; break; case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): - mmhub_cid = mmhub_client_ids_sienna_cichlid[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_sienna_cichlid) ? + mmhub_client_ids_sienna_cichlid[cid][rw] : NULL; break; case IP_VERSION(2, 1, 2): - mmhub_cid = mmhub_client_ids_beige_goby[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_beige_goby) ? + mmhub_client_ids_beige_goby[cid][rw] : NULL; break; default: mmhub_cid = NULL; From a54403a534972af5d9ba5aaa3bb6ead612500ec6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 4 Mar 2026 17:24:10 -0500 Subject: [PATCH 08/18] drm/amdgpu/mmhub2.3: add bounds checking for cid The value should never exceed the array size as those are the only values the hardware is expected to return, but add checks anyway. Reviewed-by: Benjamin Cheng Signed-off-by: Alex Deucher (cherry picked from commit 89cd90375c19fb45138990b70e9f4ba4806f05c4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 5eb8122e2746..ceb2f6b46de5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -94,7 +94,8 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, case IP_VERSION(2, 3, 0): case IP_VERSION(2, 4, 0): case IP_VERSION(2, 4, 1): - mmhub_cid = mmhub_client_ids_vangogh[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vangogh) ? + mmhub_client_ids_vangogh[cid][rw] : NULL; break; default: mmhub_cid = NULL; From 5d4e88bcfef29569a1db224ef15e28c603666c6d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 4 Mar 2026 17:24:35 -0500 Subject: [PATCH 09/18] drm/amdgpu/mmhub3.0.1: add bounds checking for cid The value should never exceed the array size as those are the only values the hardware is expected to return, but add checks anyway. Reviewed-by: Benjamin Cheng Signed-off-by: Alex Deucher (cherry picked from commit 5f76083183363c4528a4aaa593f5d38c28fe7d7b) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index 910337dc28d1..14a742d3a99d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -117,7 +117,8 @@ mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 0, 1): - mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_1) ? + mmhub_client_ids_v3_0_1[cid][rw] : NULL; break; default: mmhub_cid = NULL; From e5e6d67b1ce9764e67aef2d0eef9911af53ad99a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 4 Mar 2026 17:25:09 -0500 Subject: [PATCH 10/18] drm/amdgpu/mmhub3.0.2: add bounds checking for cid The value should never exceed the array size as those are the only values the hardware is expected to return, but add checks anyway. Reviewed-by: Benjamin Cheng Signed-off-by: Alex Deucher (cherry picked from commit 1441f52c7f6ae6553664aa9e3e4562f6fc2fe8ea) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index f0f182f033b9..e1f07f2a1852 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -108,7 +108,8 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - mmhub_cid = mmhub_client_ids_v3_0_2[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_2) ? + mmhub_client_ids_v3_0_2[cid][rw] : NULL; dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", From cdb82ecbeccb55fae75a3c956b605f7801a30db1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 4 Mar 2026 17:25:30 -0500 Subject: [PATCH 11/18] drm/amdgpu/mmhub3.0: add bounds checking for cid The value should never exceed the array size as those are the only values the hardware is expected to return, but add checks anyway. Reviewed-by: Benjamin Cheng Signed-off-by: Alex Deucher (cherry picked from commit f14f27bbe2a3ed7af32d5f6eaf3f417139f45253) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index 7d5242df58a5..ab966e69a342 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -110,7 +110,8 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 1): - mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_0) ? + mmhub_client_ids_v3_0_0[cid][rw] : NULL; break; default: mmhub_cid = NULL; From 3cdd405831d8cc50a5eae086403402697bb98a4a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 4 Mar 2026 17:25:56 -0500 Subject: [PATCH 12/18] drm/amdgpu/mmhub4.1.0: add bounds checking for cid The value should never exceed the array size as those are the only values the hardware is expected to return, but add checks anyway. Reviewed-by: Benjamin Cheng Signed-off-by: Alex Deucher (cherry picked from commit 04f063d85090f5dd0c671010ce88ee49d9dcc8ed) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 951998454b25..88bfe321f83a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -102,7 +102,8 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev, status); switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(4, 1, 0): - mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v4_1_0) ? + mmhub_client_ids_v4_1_0[cid][rw] : NULL; break; default: mmhub_cid = NULL; From 9c52f49545478aa47769378cd0b53c5005d6a846 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 4 Mar 2026 17:26:17 -0500 Subject: [PATCH 13/18] drm/amdgpu/mmhub4.2.0: add bounds checking for cid The value should never exceed the array size as those are the only values the hardware is expected to return, but add checks anyway. Reviewed-by: Benjamin Cheng Signed-off-by: Alex Deucher (cherry picked from commit dea5f235baf3786bfd4fd920b03c19285fdc3d9f) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c index a72770e3d0e9..2532ca80f735 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c @@ -688,7 +688,8 @@ mmhub_v4_2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, status); switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(4, 2, 0): - mmhub_cid = mmhub_client_ids_v4_2_0[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v4_2_0) ? + mmhub_client_ids_v4_2_0[cid][rw] : NULL; break; default: mmhub_cid = NULL; From f39e1270277f4b06db0b2c6ec9405b6dd766fb13 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Mar 2026 15:51:08 -0400 Subject: [PATCH 14/18] drm/amdgpu/gmc9.0: add bounds checking for cid The value should never exceed the array size as those are the only values the hardware is expected to return, but add checks anyway. Cc: Benjamin Cheng Reviewed-by: Benjamin Cheng Signed-off-by: Alex Deucher (cherry picked from commit e14d468304832bcc4a082d95849bc0a41b18ddea) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e35ed0cc2ec6..8eba99aa0f8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -662,28 +662,35 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, } else { switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 0, 0): - mmhub_cid = mmhub_client_ids_vega10[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega10) ? + mmhub_client_ids_vega10[cid][rw] : NULL; break; case IP_VERSION(9, 3, 0): - mmhub_cid = mmhub_client_ids_vega12[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega12) ? + mmhub_client_ids_vega12[cid][rw] : NULL; break; case IP_VERSION(9, 4, 0): - mmhub_cid = mmhub_client_ids_vega20[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega20) ? + mmhub_client_ids_vega20[cid][rw] : NULL; break; case IP_VERSION(9, 4, 1): - mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_arcturus) ? + mmhub_client_ids_arcturus[cid][rw] : NULL; break; case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 0): - mmhub_cid = mmhub_client_ids_raven[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_raven) ? + mmhub_client_ids_raven[cid][rw] : NULL; break; case IP_VERSION(1, 5, 0): case IP_VERSION(2, 4, 0): - mmhub_cid = mmhub_client_ids_renoir[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_renoir) ? + mmhub_client_ids_renoir[cid][rw] : NULL; break; case IP_VERSION(1, 8, 0): case IP_VERSION(9, 4, 2): - mmhub_cid = mmhub_client_ids_aldebaran[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_aldebaran) ? + mmhub_client_ids_aldebaran[cid][rw] : NULL; break; default: mmhub_cid = NULL; From 3fc4648b53b7e393b91e63600e28e6f25c8ef0c5 Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Wed, 11 Mar 2026 12:15:09 -0400 Subject: [PATCH 15/18] drm/amdgpu: Fix ISP segfault issue in kernel v7.0 Add NULL pointer checks for dev->type before accessing dev->type->name in ISP genpd add/remove functions to prevent kernel crashes. This regression was introduced in v7.0 as the wakeup sources are registered using physical device instead of ACPI device. This led to adding wakeup source device as the first child of AMDGPU device without initializing dev-type variable, and resulted in segfault when accessed it in the amdgpu isp driver. Fixes: 057edc58aa59 ("ACPI: PM: Register wakeup sources under physical devices") Suggested-by: Bin Du Reviewed-by: Mario Limonciello Signed-off-by: Pratap Nirujogi Signed-off-by: Alex Deucher (cherry picked from commit c51632d1ed7ac5aed2d40dbc0718d75342c12c6a) --- drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index b3590b33cab9..485ecdec9618 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -129,7 +129,7 @@ static int isp_genpd_add_device(struct device *dev, void *data) if (!pdev) return -EINVAL; - if (!dev->type->name) { + if (!dev->type || !dev->type->name) { drm_dbg(&adev->ddev, "Invalid device type to add\n"); goto exit; } @@ -165,7 +165,7 @@ static int isp_genpd_remove_device(struct device *dev, void *data) if (!pdev) return -EINVAL; - if (!dev->type->name) { + if (!dev->type || !dev->type->name) { drm_dbg(&adev->ddev, "Invalid device type to remove\n"); goto exit; } From e9f58ff991dd4be13fd7a651bbf64329c090af09 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Mar 2026 11:04:46 -0400 Subject: [PATCH 16/18] drm/amdgpu: rework how we handle TLB fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new VM flag to indicate whether or not we need a TLB fence. Userqs (KFD or KGD) require a TLB fence. A TLB fence is not strictly required for kernel queues, but it shouldn't hurt. That said, enabling this unconditionally should be fine, but it seems to tickle some issues in KIQ/MES. Only enable them for KFD, or when KGD userq queues are enabled (currently via module parameter). Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4798 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4749 Fixes: f3854e04b708 ("drm/amdgpu: attach tlb fence to the PTs update") Cc: Christian König Cc: Prike Liang Reviewed-by: Prike Liang Signed-off-by: Alex Deucher (cherry picked from commit 69c5fbd2b93b5ced77c6e79afe83371bca84c788) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f2beb980e3c3..c60cbce356cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1069,7 +1069,10 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, } /* Prepare a TLB flush fence to be attached to PTs */ - if (!params->unlocked) { + /* The check for need_tlb_fence should be dropped once we + * sort out the issues with KIQ/MES TLB invalidation timeouts. + */ + if (!params->unlocked && vm->need_tlb_fence) { amdgpu_vm_tlb_fence_create(params->adev, vm, fence); /* Makes sure no PD/PT is freed before the flush */ @@ -2602,6 +2605,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ttm_lru_bulk_move_init(&vm->lru_bulk_move); vm->is_compute_context = false; + vm->need_tlb_fence = amdgpu_userq_enabled(&adev->ddev); vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); @@ -2739,6 +2743,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_update); vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true; + vm->need_tlb_fence = true; unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 806d62ed61ef..bb276c0ad06d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -441,6 +441,8 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* Flag to indicate if VM is used for compute */ bool is_compute_context; + /* Flag to indicate if VM needs a TLB fence (KFD or KGD) */ + bool need_tlb_fence; /* Memory partition number, -1 means any partition */ int8_t mem_id; From 9787f7da186ee8143b7b6d914cfa0b6e7fee2648 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 25 Sep 2023 10:44:07 -0400 Subject: [PATCH 17/18] drm/amdgpu: apply state adjust rules to some additional HAINAN vairants They need a similar workaround. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1839 Signed-off-by: Alex Deucher (cherry picked from commit 0de31d92a173d3d94f28051b0b80a6c98913aed4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 61b1c5aa74cb..36942467d4ad 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3454,9 +3454,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, if (adev->asic_type == CHIP_HAINAN) { if ((adev->pdev->revision == 0x81) || (adev->pdev->revision == 0xC3) || + (adev->pdev->device == 0x6660) || (adev->pdev->device == 0x6664) || (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667)) { + (adev->pdev->device == 0x6667) || + (adev->pdev->device == 0x666F)) { max_sclk = 75000; } if ((adev->pdev->revision == 0xC3) || From 86650ee2241ff84207eaa298ab318533f3c21a38 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 25 Sep 2023 10:44:06 -0400 Subject: [PATCH 18/18] drm/radeon: apply state adjust rules to some additional HAINAN vairants They need a similar workaround. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1839 Signed-off-by: Alex Deucher (cherry picked from commit 87327658c848f56eac166cb382b57b83bf06c5ac) Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/si_dpm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index b4aa49b1ac63..4b10715f951c 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2915,9 +2915,11 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (rdev->family == CHIP_HAINAN) { if ((rdev->pdev->revision == 0x81) || (rdev->pdev->revision == 0xC3) || + (rdev->pdev->device == 0x6660) || (rdev->pdev->device == 0x6664) || (rdev->pdev->device == 0x6665) || - (rdev->pdev->device == 0x6667)) { + (rdev->pdev->device == 0x6667) || + (rdev->pdev->device == 0x666F)) { max_sclk = 75000; } if ((rdev->pdev->revision == 0xC3) ||