mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

kernel panic caused by RAS records exceeding the threshold when load driver specifying RMA(bad_page_threshold=128) 1.Fix the warnings caused by disabling the interrupt source before it was enabled 2.Fix kernel panic when xcp sysfs is not initialized,null pointer appears during fini 3.Fix the memory leak caused by the device's early exit due to rma The first reason: [ 2744.246650] ------------[ cut here ]------------ [ 2744.246651] WARNING: CPU: 0 PID: 289 at /tmp/amd.BkfTLqYV/amd/amdgpu/amdgpu_irq.c:635 amdgpu_irq_put.cold+0x42/0x6e [amdgpu] [ 2744.247108] Modules linked in: amdgpu(OE+) amddrm_ttm_helper(OE) amdttm(OE) amdxcp(OE) amddrm_buddy(OE) amddrm_exec(OE) amd_sched(OE) amdkcl(OE) xt_conntrack nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack_netlink nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xfrm_user xfrm_algo nft_counter xt_addrtype nft_compat nf_tables nfnetlink br_netfilter bridge stp llc overlay binfmt_misc intel_rapl_msr intel_rapl_common i10nm_edac nfit x86_pkg_temp_thermal intel_powerclamp coretemp ipmi_ssif kvm_intel nls_iso8859_1 kvm rapl isst_if_mbox_pci isst_if_mmio pmt_telemetry pmt_crashlog isst_if_common pmt_class mei_me mei acpi_ipmi ipmi_si ipmi_devintf ipmi_msghandler acpi_power_meter acpi_pad mac_hid sch_fq_codel dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua msr ramoops reed_solomon pstore_blk pstore_zone efi_pstore ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath [ 2744.247167] linear mlx5_ib ib_uverbs ib_core ast i2c_algo_bit drm_vram_helper drm_ttm_helper ttm drm_kms_helper crct10dif_pclmul syscopyarea crc32_pclmul ghash_clmulni_intel mlx5_core sysfillrect sysimgblt aesni_intel mlxfw fb_sys_fops psample cec crypto_simd cryptd rc_core i2c_i801 nvme xhci_pci tls intel_pmt drm pci_hyperv_intf nvme_core i2c_smbus i2c_ismt xhci_pci_renesas wmi pinctrl_emmitsburg [ 2744.247194] CPU: 0 PID: 289 Comm: kworker/0:1 Tainted: G OE 5.15.0-70-generic #77-Ubuntu [ 2744.247197] Hardware name: Microsoft C278A/C278A, BIOS C2789.5.BS.1C23.AG.2 11/21/2024 [ 2744.247198] Workqueue: events work_for_cpu_fn [ 2744.247206] RIP: 0010:amdgpu_irq_put.cold+0x42/0x6e [amdgpu] [ 2744.247634] Code: 79 7f ff 44 89 ee 48 c7 c7 4d 5a 42 c2 89 55 d4 e8 90 09 bc bf 8b 55 d4 4c 89 e6 4c 89 ff e8 3c 76 7f ff 8b 55 d4 84 c0 75 07 <0f> 0b e9 95 79 7f ff 49 03 5c 24 08 f0 ff 0b 75 13 4c 89 e6 4c 89 [ 2744.247636] RSP: 0018:ffa0000019e27cb0 EFLAGS: 00010246 [ 2744.247639] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ff11000150fa87c0 [ 2744.247641] RDX: 0000000000000000 RSI: ffffffffc2222430 RDI: ff1100019f200000 [ 2744.247642] RBP: ffa0000019e27ce0 R08: 0000000000000003 R09: ffffffffffe41a08 [ 2744.247643] R10: 0000000000ffff0a R11: 0000000000000001 R12: ff1100019f22ce60 [ 2744.247644] R13: 0000000000000000 R14: 00000000ffffffea R15: ff1100019f200000 [ 2744.247645] FS: 0000000000000000(0000) GS:ff11007e7e400000(0000) knlGS:0000000000000000 [ 2744.247647] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2744.247649] CR2: 00007f3d2002819c CR3: 0000000006810003 CR4: 0000000000771ef0 [ 2744.247650] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 2744.247651] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 2744.247652] PKRU: 55555554 [ 2744.247653] Call Trace: [ 2744.247654] <TASK> [ 2744.247656] sdma_v4_4_2_hw_fini+0x7a/0xc0 [amdgpu] [ 2744.247997] ? vcn_v4_0_3_hw_fini+0x5f/0xa0 [amdgpu] [ 2744.248336] amdgpu_ip_block_hw_fini+0x31/0x61 [amdgpu] [ 2744.248776] amdgpu_device_fini_hw+0x3bb/0x47b [amdgpu] [ 2744.249197] ? blocking_notifier_chain_unregister+0x56/0xb0 [ 2744.249202] amdgpu_driver_unload_kms+0x51/0x60 [amdgpu] [ 2744.249482] amdgpu_driver_load_kms.cold+0x18/0x2e [amdgpu] [ 2744.249913] amdgpu_pci_probe+0x23e/0x590 [amdgpu] [ 2744.250187] local_pci_probe+0x48/0x90 [ 2744.250191] work_for_cpu_fn+0x17/0x30 [ 2744.250196] process_one_work+0x228/0x3d0 [ 2744.250198] worker_thread+0x223/0x420 [ 2744.250200] ? process_one_work+0x3d0/0x3d0 [ 2744.250201] kthread+0x127/0x150 [ 2744.250204] ? set_kthread_struct+0x50/0x50 [ 2744.250207] ret_from_fork+0x1f/0x30 [ 2744.250212] </TASK> [ 2744.250213] ---[ end trace 488c997a88508bc3 ]--- The second reason: [ 5139.303446] Memory manager not clean during takedown. [ 5139.303509] WARNING: CPU: 145 PID: 117699 at drivers/gpu/drm/drm_mm.c:998 drm_mm_takedown+0x27/0x30 [drm] [ 5139.303542] Modules linked in: amdgpu(OE+) amddrm_ttm_helper(OE) amdttm(OE) amdxcp(OE) amddrm_buddy(OE) amddrm_exec(OE) amd_sched(OE) amdkcl(OE) xt_conntrack nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack_netlink nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xfrm_user xfrm_algo nft_counter xt_addrtype nft_compat nf_tables nfnetlink br_netfilter bridge stp llc overlay intel_rapl_msr intel_rapl_common i10nm_edac nfit x86_pkg_temp_thermal intel_powerclamp coretemp ipmi_ssif kvm_intel binfmt_misc kvm nls_iso8859_1 rapl isst_if_mbox_pci pmt_telemetry pmt_crashlog isst_if_mmio pmt_class isst_if_common mei_me mei acpi_ipmi ipmi_si ipmi_devintf ipmi_msghandler acpi_pad acpi_power_meter mac_hid sch_fq_codel dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua msr ramoops reed_solomon pstore_blk pstore_zone efi_pstore ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath [ 5139.303572] linear mlx5_ib ib_uverbs ib_core crct10dif_pclmul ast crc32_pclmul i2c_algo_bit ghash_clmulni_intel aesni_intel crypto_simd drm_vram_helper cryptd drm_ttm_helper mlx5_core ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec rc_core mlxfw psample intel_pmt nvme xhci_pci drm tls i2c_i801 pci_hyperv_intf nvme_core i2c_smbus i2c_ismt xhci_pci_renesas wmi pinctrl_emmitsburg [last unloaded: amdkcl] [ 5139.303588] CPU: 145 PID: 117699 Comm: modprobe Tainted: G U OE 5.15.0-70-generic #77-Ubuntu [ 5139.303590] Hardware name: Microsoft C278A/C278A, BIOS C2789.5.BS.1C23.AG.2 11/21/2024 [ 5139.303591] RIP: 0010:drm_mm_takedown+0x27/0x30 [drm] [ 5139.303605] Code: cc 66 90 0f 1f 44 00 00 48 8b 47 38 48 83 c7 38 48 39 f8 75 05 c3 cc cc cc cc 55 48 c7 c7 18 d0 10 c0 48 89 e5 e8 5a bc c3 c1 <0f> 0b 5d c3 cc cc cc cc 90 0f 1f 44 00 00 55 b9 15 00 00 00 48 89 [ 5139.303607] RSP: 0018:ffa00000325c3940 EFLAGS: 00010286 [ 5139.303608] RAX: 0000000000000000 RBX: ff1100012f5cecb0 RCX: 0000000000000027 [ 5139.303609] RDX: ff11007e7fa60588 RSI: 0000000000000001 RDI: ff11007e7fa60580 [ 5139.303610] RBP: ffa00000325c3940 R08: 0000000000000003 R09: fffffffff00c2b78 [ 5139.303610] R10: 000000000000002b R11: 0000000000000001 R12: ff1100012f5cec00 [ 5139.303611] R13: ff1100012138f068 R14: 0000000000000000 R15: ff1100012f5cec90 [ 5139.303611] FS: 00007f42ffca0000(0000) GS:ff11007e7fa40000(0000) knlGS:0000000000000000 [ 5139.303612] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5139.303613] CR2: 00007f23d945ab68 CR3: 00000001212ce005 CR4: 0000000000771ee0 [ 5139.303614] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 5139.303615] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 5139.303615] PKRU: 55555554 [ 5139.303616] Call Trace: [ 5139.303617] <TASK> [ 5139.303619] amdttm_range_man_fini_nocheck+0xfe/0x1c0 [amdttm] [ 5139.303625] amdgpu_ttm_fini+0x2ed/0x390 [amdgpu] [ 5139.303800] amdgpu_bo_fini+0x27/0xc0 [amdgpu] [ 5139.303959] gmc_v9_0_sw_fini+0x63/0x90 [amdgpu] [ 5139.304144] amdgpu_device_fini_sw+0x125/0x6a0 [amdgpu] [ 5139.304302] amdgpu_driver_release_kms+0x16/0x30 [amdgpu] [ 5139.304455] devm_drm_dev_init_release+0x4a/0x80 [drm] [ 5139.304472] devm_action_release+0x12/0x20 [ 5139.304476] release_nodes+0x3d/0xb0 [ 5139.304478] devres_release_all+0x9b/0xd0 [ 5139.304480] really_probe+0x11d/0x420 [ 5139.304483] __driver_probe_device+0x119/0x190 [ 5139.304485] driver_probe_device+0x23/0xc0 [ 5139.304487] __driver_attach+0xf7/0x1f0 [ 5139.304489] ? __device_attach_driver+0x140/0x140 [ 5139.304491] bus_for_each_dev+0x7c/0xd0 [ 5139.304493] driver_attach+0x1e/0x30 [ 5139.304494] bus_add_driver+0x148/0x220 [ 5139.304496] driver_register+0x95/0x100 [ 5139.304498] __pci_register_driver+0x68/0x70 [ 5139.304500] amdgpu_init+0xbc/0x1000 [amdgpu] [ 5139.304655] ? 0xffffffffc0b8f000 [ 5139.304657] do_one_initcall+0x46/0x1e0 [ 5139.304659] ? kmem_cache_alloc_trace+0x19e/0x2e0 [ 5139.304663] do_init_module+0x52/0x260 [ 5139.304665] load_module+0xb2b/0xbc0 [ 5139.304667] __do_sys_finit_module+0xbf/0x120 [ 5139.304669] __x64_sys_finit_module+0x18/0x20 [ 5139.304670] do_syscall_64+0x59/0xc0 [ 5139.304673] ? exit_to_user_mode_prepare+0x37/0xb0 [ 5139.304676] ? syscall_exit_to_user_mode+0x27/0x50 [ 5139.304678] ? __x64_sys_mmap+0x33/0x50 [ 5139.304680] ? do_syscall_64+0x69/0xc0 [ 5139.304681] entry_SYSCALL_64_after_hwframe+0x61/0xcb [ 5139.304684] RIP: 0033:0x7f42ffdbf88d [ 5139.304686] Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 b5 0f 00 f7 d8 64 89 01 48 [ 5139.304687] RSP: 002b:00007ffcb7427158 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 5139.304688] RAX: ffffffffffffffda RBX: 000055ce8b8f3150 RCX: 00007f42ffdbf88d [ 5139.304689] RDX: 0000000000000000 RSI: 000055ce8b8f9a70 RDI: 000000000000000a [ 5139.304690] RBP: 0000000000040000 R08: 0000000000000000 R09: 0000000000000011 [ 5139.304690] R10: 000000000000000a R11: 0000000000000246 R12: 000055ce8b8f9a70 [ 5139.304691] R13: 000055ce8b8f2ec0 R14: 000055ce8b8f2ab0 R15: 000055ce8b8f9aa0 [ 5139.304692] </TASK> [ 5139.304693] ---[ end trace 8536b052f7883003 ]--- Signed-off-by: Ce Sun <cesun102@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
725 lines
18 KiB
C
725 lines
18 KiB
C
/*
|
|
* Copyright 2022 Advanced Micro Devices, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
*/
|
|
#include "amdgpu.h"
|
|
#include "amdgpu_xcp.h"
|
|
#include "amdgpu_drv.h"
|
|
|
|
#include <drm/drm_drv.h>
|
|
#include "../amdxcp/amdgpu_xcp_drv.h"
|
|
|
|
static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr,
|
|
struct amdgpu_xcp_ip *xcp_ip, int xcp_state)
|
|
{
|
|
int (*run_func)(void *handle, uint32_t inst_mask);
|
|
int ret = 0;
|
|
|
|
if (!xcp_ip || !xcp_ip->valid || !xcp_ip->ip_funcs)
|
|
return 0;
|
|
|
|
run_func = NULL;
|
|
|
|
switch (xcp_state) {
|
|
case AMDGPU_XCP_PREPARE_SUSPEND:
|
|
run_func = xcp_ip->ip_funcs->prepare_suspend;
|
|
break;
|
|
case AMDGPU_XCP_SUSPEND:
|
|
run_func = xcp_ip->ip_funcs->suspend;
|
|
break;
|
|
case AMDGPU_XCP_PREPARE_RESUME:
|
|
run_func = xcp_ip->ip_funcs->prepare_resume;
|
|
break;
|
|
case AMDGPU_XCP_RESUME:
|
|
run_func = xcp_ip->ip_funcs->resume;
|
|
break;
|
|
}
|
|
|
|
if (run_func)
|
|
ret = run_func(xcp_mgr->adev, xcp_ip->inst_mask);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int amdgpu_xcp_run_transition(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
|
|
int state)
|
|
{
|
|
struct amdgpu_xcp_ip *xcp_ip;
|
|
struct amdgpu_xcp *xcp;
|
|
int i, ret;
|
|
|
|
if (xcp_id >= MAX_XCP || !xcp_mgr->xcp[xcp_id].valid)
|
|
return -EINVAL;
|
|
|
|
xcp = &xcp_mgr->xcp[xcp_id];
|
|
for (i = 0; i < AMDGPU_XCP_MAX_BLOCKS; ++i) {
|
|
xcp_ip = &xcp->ip[i];
|
|
ret = __amdgpu_xcp_run(xcp_mgr, xcp_ip, state);
|
|
if (ret)
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
|
|
{
|
|
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
|
|
AMDGPU_XCP_PREPARE_SUSPEND);
|
|
}
|
|
|
|
int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
|
|
{
|
|
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_SUSPEND);
|
|
}
|
|
|
|
int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
|
|
{
|
|
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
|
|
AMDGPU_XCP_PREPARE_RESUME);
|
|
}
|
|
|
|
int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
|
|
{
|
|
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_RESUME);
|
|
}
|
|
|
|
static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
|
|
struct amdgpu_xcp_ip *ip)
|
|
{
|
|
struct amdgpu_xcp *xcp;
|
|
|
|
if (!ip)
|
|
return;
|
|
|
|
xcp = &xcp_mgr->xcp[xcp_id];
|
|
xcp->ip[ip->ip_id] = *ip;
|
|
xcp->ip[ip->ip_id].valid = true;
|
|
|
|
xcp->valid = true;
|
|
}
|
|
|
|
int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
|
|
{
|
|
struct amdgpu_device *adev = xcp_mgr->adev;
|
|
struct amdgpu_xcp_ip ip;
|
|
uint8_t mem_id;
|
|
int i, j, ret;
|
|
|
|
if (!num_xcps || num_xcps > MAX_XCP)
|
|
return -EINVAL;
|
|
|
|
xcp_mgr->mode = mode;
|
|
|
|
for (i = 0; i < MAX_XCP; ++i)
|
|
xcp_mgr->xcp[i].valid = false;
|
|
|
|
/* This is needed for figuring out memory id of xcp */
|
|
xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions;
|
|
|
|
for (i = 0; i < num_xcps; ++i) {
|
|
for (j = AMDGPU_XCP_GFXHUB; j < AMDGPU_XCP_MAX_BLOCKS; ++j) {
|
|
ret = xcp_mgr->funcs->get_ip_details(xcp_mgr, i, j,
|
|
&ip);
|
|
if (ret)
|
|
continue;
|
|
|
|
__amdgpu_xcp_add_block(xcp_mgr, i, &ip);
|
|
}
|
|
|
|
xcp_mgr->xcp[i].id = i;
|
|
|
|
if (xcp_mgr->funcs->get_xcp_mem_id) {
|
|
ret = xcp_mgr->funcs->get_xcp_mem_id(
|
|
xcp_mgr, &xcp_mgr->xcp[i], &mem_id);
|
|
if (ret)
|
|
continue;
|
|
else
|
|
xcp_mgr->xcp[i].mem_id = mem_id;
|
|
}
|
|
}
|
|
|
|
xcp_mgr->num_xcps = num_xcps;
|
|
amdgpu_xcp_update_partition_sched_list(adev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
|
|
int mode)
|
|
{
|
|
int ret, curr_mode, num_xcps = 0;
|
|
|
|
if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode)
|
|
return 0;
|
|
|
|
mutex_lock(&xcp_mgr->xcp_lock);
|
|
|
|
curr_mode = xcp_mgr->mode;
|
|
/* State set to transient mode */
|
|
xcp_mgr->mode = AMDGPU_XCP_MODE_TRANS;
|
|
|
|
ret = xcp_mgr->funcs->switch_partition_mode(xcp_mgr, mode, &num_xcps);
|
|
|
|
if (ret) {
|
|
/* Failed, get whatever mode it's at now */
|
|
if (xcp_mgr->funcs->query_partition_mode)
|
|
xcp_mgr->mode = amdgpu_xcp_query_partition_mode(
|
|
xcp_mgr, AMDGPU_XCP_FL_LOCKED);
|
|
else
|
|
xcp_mgr->mode = curr_mode;
|
|
|
|
goto out;
|
|
}
|
|
|
|
out:
|
|
mutex_unlock(&xcp_mgr->xcp_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
|
|
{
|
|
if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
|
|
return -EINVAL;
|
|
|
|
if (xcp_mgr->mode == mode)
|
|
return 0;
|
|
|
|
return __amdgpu_xcp_switch_partition_mode(xcp_mgr, mode);
|
|
}
|
|
|
|
int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
|
|
{
|
|
if (!xcp_mgr || xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
|
|
return 0;
|
|
|
|
return __amdgpu_xcp_switch_partition_mode(xcp_mgr, xcp_mgr->mode);
|
|
}
|
|
|
|
int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
|
|
{
|
|
int mode;
|
|
|
|
if (!amdgpu_sriov_vf(xcp_mgr->adev) &&
|
|
xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
|
|
return xcp_mgr->mode;
|
|
|
|
if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode)
|
|
return xcp_mgr->mode;
|
|
|
|
if (!(flags & AMDGPU_XCP_FL_LOCKED))
|
|
mutex_lock(&xcp_mgr->xcp_lock);
|
|
mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
|
|
|
|
/* First time query for VF, set the mode here */
|
|
if (amdgpu_sriov_vf(xcp_mgr->adev) &&
|
|
xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
|
|
xcp_mgr->mode = mode;
|
|
|
|
if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode)
|
|
dev_WARN(
|
|
xcp_mgr->adev->dev,
|
|
"Cached partition mode %d not matching with device mode %d",
|
|
xcp_mgr->mode, mode);
|
|
|
|
if (!(flags & AMDGPU_XCP_FL_LOCKED))
|
|
mutex_unlock(&xcp_mgr->xcp_lock);
|
|
|
|
return mode;
|
|
}
|
|
|
|
static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
|
|
{
|
|
struct drm_device *p_ddev;
|
|
struct drm_device *ddev;
|
|
int i, ret;
|
|
|
|
ddev = adev_to_drm(adev);
|
|
|
|
/* xcp #0 shares drm device setting with adev */
|
|
adev->xcp_mgr->xcp->ddev = ddev;
|
|
|
|
for (i = 1; i < MAX_XCP; i++) {
|
|
ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
|
|
if (ret == -ENOSPC) {
|
|
dev_warn(adev->dev,
|
|
"Skip xcp node #%d when out of drm node resource.", i);
|
|
return 0;
|
|
} else if (ret) {
|
|
return ret;
|
|
}
|
|
|
|
/* Redirect all IOCTLs to the primary device */
|
|
adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
|
|
adev->xcp_mgr->xcp[i].pdev = p_ddev->primary->dev;
|
|
adev->xcp_mgr->xcp[i].driver = (struct drm_driver *)p_ddev->driver;
|
|
adev->xcp_mgr->xcp[i].vma_offset_manager = p_ddev->vma_offset_manager;
|
|
p_ddev->render->dev = ddev;
|
|
p_ddev->primary->dev = ddev;
|
|
p_ddev->vma_offset_manager = ddev->vma_offset_manager;
|
|
p_ddev->driver = &amdgpu_partition_driver;
|
|
adev->xcp_mgr->xcp[i].ddev = p_ddev;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
|
|
int init_num_xcps,
|
|
struct amdgpu_xcp_mgr_funcs *xcp_funcs)
|
|
{
|
|
struct amdgpu_xcp_mgr *xcp_mgr;
|
|
|
|
if (!xcp_funcs || !xcp_funcs->get_ip_details)
|
|
return -EINVAL;
|
|
|
|
xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL);
|
|
|
|
if (!xcp_mgr)
|
|
return -ENOMEM;
|
|
|
|
xcp_mgr->adev = adev;
|
|
xcp_mgr->funcs = xcp_funcs;
|
|
xcp_mgr->mode = init_mode;
|
|
mutex_init(&xcp_mgr->xcp_lock);
|
|
|
|
if (init_mode != AMDGPU_XCP_MODE_NONE)
|
|
amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
|
|
|
|
adev->xcp_mgr = xcp_mgr;
|
|
|
|
return amdgpu_xcp_dev_alloc(adev);
|
|
}
|
|
|
|
int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
|
|
enum AMDGPU_XCP_IP_BLOCK ip, int instance)
|
|
{
|
|
struct amdgpu_xcp *xcp;
|
|
int i, id_mask = 0;
|
|
|
|
if (ip >= AMDGPU_XCP_MAX_BLOCKS)
|
|
return -EINVAL;
|
|
|
|
for (i = 0; i < xcp_mgr->num_xcps; ++i) {
|
|
xcp = &xcp_mgr->xcp[i];
|
|
if ((xcp->valid) && (xcp->ip[ip].valid) &&
|
|
(xcp->ip[ip].inst_mask & BIT(instance)))
|
|
id_mask |= BIT(i);
|
|
}
|
|
|
|
if (!id_mask)
|
|
id_mask = -ENXIO;
|
|
|
|
return id_mask;
|
|
}
|
|
|
|
int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
|
|
enum AMDGPU_XCP_IP_BLOCK ip,
|
|
uint32_t *inst_mask)
|
|
{
|
|
if (!xcp->valid || !inst_mask || !(xcp->ip[ip].valid))
|
|
return -EINVAL;
|
|
|
|
*inst_mask = xcp->ip[ip].inst_mask;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
|
|
const struct pci_device_id *ent)
|
|
{
|
|
int i, ret;
|
|
|
|
if (!adev->xcp_mgr)
|
|
return 0;
|
|
|
|
for (i = 1; i < MAX_XCP; i++) {
|
|
if (!adev->xcp_mgr->xcp[i].ddev)
|
|
break;
|
|
|
|
ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
|
|
{
|
|
struct drm_device *p_ddev;
|
|
int i;
|
|
|
|
if (!adev->xcp_mgr)
|
|
return;
|
|
|
|
for (i = 1; i < MAX_XCP; i++) {
|
|
if (!adev->xcp_mgr->xcp[i].ddev)
|
|
break;
|
|
|
|
p_ddev = adev->xcp_mgr->xcp[i].ddev;
|
|
drm_dev_unplug(p_ddev);
|
|
p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
|
|
p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
|
|
p_ddev->driver = adev->xcp_mgr->xcp[i].driver;
|
|
p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
|
|
}
|
|
}
|
|
|
|
int amdgpu_xcp_open_device(struct amdgpu_device *adev,
|
|
struct amdgpu_fpriv *fpriv,
|
|
struct drm_file *file_priv)
|
|
{
|
|
int i;
|
|
|
|
if (!adev->xcp_mgr)
|
|
return 0;
|
|
|
|
fpriv->xcp_id = AMDGPU_XCP_NO_PARTITION;
|
|
for (i = 0; i < MAX_XCP; ++i) {
|
|
if (!adev->xcp_mgr->xcp[i].ddev)
|
|
break;
|
|
|
|
if (file_priv->minor == adev->xcp_mgr->xcp[i].ddev->render) {
|
|
if (adev->xcp_mgr->xcp[i].valid == FALSE) {
|
|
dev_err(adev->dev, "renderD%d partition %d not valid!",
|
|
file_priv->minor->index, i);
|
|
return -ENOENT;
|
|
}
|
|
dev_dbg(adev->dev, "renderD%d partition %d opened!",
|
|
file_priv->minor->index, i);
|
|
fpriv->xcp_id = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
fpriv->vm.mem_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ? -1 :
|
|
adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id;
|
|
return 0;
|
|
}
|
|
|
|
void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
|
|
struct amdgpu_ctx_entity *entity)
|
|
{
|
|
struct drm_gpu_scheduler *sched;
|
|
struct amdgpu_ring *ring;
|
|
|
|
if (!adev->xcp_mgr)
|
|
return;
|
|
|
|
sched = entity->entity.rq->sched;
|
|
if (drm_sched_wqueue_ready(sched)) {
|
|
ring = to_amdgpu_ring(entity->entity.rq->sched);
|
|
atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt);
|
|
}
|
|
}
|
|
|
|
#define XCP_CFG_SYSFS_RES_ATTR_SHOW(_name) \
|
|
static ssize_t amdgpu_xcp_res_sysfs_##_name##_show( \
|
|
struct amdgpu_xcp_res_details *xcp_res, char *buf) \
|
|
{ \
|
|
return sysfs_emit(buf, "%d\n", xcp_res->_name); \
|
|
}
|
|
|
|
struct amdgpu_xcp_res_sysfs_attribute {
|
|
struct attribute attr;
|
|
ssize_t (*show)(struct amdgpu_xcp_res_details *xcp_res, char *buf);
|
|
};
|
|
|
|
#define XCP_CFG_SYSFS_RES_ATTR(_name) \
|
|
struct amdgpu_xcp_res_sysfs_attribute xcp_res_sysfs_attr_##_name = { \
|
|
.attr = { .name = __stringify(_name), .mode = 0400 }, \
|
|
.show = amdgpu_xcp_res_sysfs_##_name##_show, \
|
|
}
|
|
|
|
XCP_CFG_SYSFS_RES_ATTR_SHOW(num_inst)
|
|
XCP_CFG_SYSFS_RES_ATTR(num_inst);
|
|
XCP_CFG_SYSFS_RES_ATTR_SHOW(num_shared)
|
|
XCP_CFG_SYSFS_RES_ATTR(num_shared);
|
|
|
|
#define XCP_CFG_SYSFS_RES_ATTR_PTR(_name) xcp_res_sysfs_attr_##_name.attr
|
|
|
|
static struct attribute *xcp_cfg_res_sysfs_attrs[] = {
|
|
&XCP_CFG_SYSFS_RES_ATTR_PTR(num_inst),
|
|
&XCP_CFG_SYSFS_RES_ATTR_PTR(num_shared), NULL
|
|
};
|
|
|
|
static const char *xcp_desc[] = {
|
|
[AMDGPU_SPX_PARTITION_MODE] = "SPX",
|
|
[AMDGPU_DPX_PARTITION_MODE] = "DPX",
|
|
[AMDGPU_TPX_PARTITION_MODE] = "TPX",
|
|
[AMDGPU_QPX_PARTITION_MODE] = "QPX",
|
|
[AMDGPU_CPX_PARTITION_MODE] = "CPX",
|
|
};
|
|
|
|
static const char *nps_desc[] = {
|
|
[UNKNOWN_MEMORY_PARTITION_MODE] = "UNKNOWN",
|
|
[AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
|
|
[AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
|
|
[AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
|
|
[AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
|
|
[AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
|
|
[AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
|
|
};
|
|
|
|
ATTRIBUTE_GROUPS(xcp_cfg_res_sysfs);
|
|
|
|
#define to_xcp_attr(x) \
|
|
container_of(x, struct amdgpu_xcp_res_sysfs_attribute, attr)
|
|
#define to_xcp_res(x) container_of(x, struct amdgpu_xcp_res_details, kobj)
|
|
|
|
static ssize_t xcp_cfg_res_sysfs_attr_show(struct kobject *kobj,
|
|
struct attribute *attr, char *buf)
|
|
{
|
|
struct amdgpu_xcp_res_sysfs_attribute *attribute;
|
|
struct amdgpu_xcp_res_details *xcp_res;
|
|
|
|
attribute = to_xcp_attr(attr);
|
|
xcp_res = to_xcp_res(kobj);
|
|
|
|
if (!attribute->show)
|
|
return -EIO;
|
|
|
|
return attribute->show(xcp_res, buf);
|
|
}
|
|
|
|
static const struct sysfs_ops xcp_cfg_res_sysfs_ops = {
|
|
.show = xcp_cfg_res_sysfs_attr_show,
|
|
};
|
|
|
|
static const struct kobj_type xcp_cfg_res_sysfs_ktype = {
|
|
.sysfs_ops = &xcp_cfg_res_sysfs_ops,
|
|
.default_groups = xcp_cfg_res_sysfs_groups,
|
|
};
|
|
|
|
const char *xcp_res_names[] = {
|
|
[AMDGPU_XCP_RES_XCC] = "xcc",
|
|
[AMDGPU_XCP_RES_DMA] = "dma",
|
|
[AMDGPU_XCP_RES_DEC] = "dec",
|
|
[AMDGPU_XCP_RES_JPEG] = "jpeg",
|
|
};
|
|
|
|
static int amdgpu_xcp_get_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
|
|
int mode,
|
|
struct amdgpu_xcp_cfg *xcp_cfg)
|
|
{
|
|
if (xcp_mgr->funcs && xcp_mgr->funcs->get_xcp_res_info)
|
|
return xcp_mgr->funcs->get_xcp_res_info(xcp_mgr, mode, xcp_cfg);
|
|
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
#define to_xcp_cfg(x) container_of(x, struct amdgpu_xcp_cfg, kobj)
|
|
static ssize_t supported_xcp_configs_show(struct kobject *kobj,
|
|
struct kobj_attribute *attr, char *buf)
|
|
{
|
|
struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
|
|
struct amdgpu_xcp_mgr *xcp_mgr = xcp_cfg->xcp_mgr;
|
|
int size = 0, mode;
|
|
char *sep = "";
|
|
|
|
if (!xcp_mgr || !xcp_mgr->supp_xcp_modes)
|
|
return sysfs_emit(buf, "Not supported\n");
|
|
|
|
for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
|
|
size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
|
|
sep = ", ";
|
|
}
|
|
|
|
size += sysfs_emit_at(buf, size, "\n");
|
|
|
|
return size;
|
|
}
|
|
|
|
static ssize_t supported_nps_configs_show(struct kobject *kobj,
|
|
struct kobj_attribute *attr, char *buf)
|
|
{
|
|
struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
|
|
int size = 0, mode;
|
|
char *sep = "";
|
|
|
|
if (!xcp_cfg || !xcp_cfg->compatible_nps_modes)
|
|
return sysfs_emit(buf, "Not supported\n");
|
|
|
|
for_each_inst(mode, xcp_cfg->compatible_nps_modes) {
|
|
size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
|
|
sep = ", ";
|
|
}
|
|
|
|
size += sysfs_emit_at(buf, size, "\n");
|
|
|
|
return size;
|
|
}
|
|
|
|
static ssize_t xcp_config_show(struct kobject *kobj,
|
|
struct kobj_attribute *attr, char *buf)
|
|
{
|
|
struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
|
|
|
|
return sysfs_emit(buf, "%s\n",
|
|
amdgpu_gfx_compute_mode_desc(xcp_cfg->mode));
|
|
}
|
|
|
|
static ssize_t xcp_config_store(struct kobject *kobj,
|
|
struct kobj_attribute *attr,
|
|
const char *buf, size_t size)
|
|
{
|
|
struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
|
|
int mode, r;
|
|
|
|
if (!strncasecmp("SPX", buf, strlen("SPX")))
|
|
mode = AMDGPU_SPX_PARTITION_MODE;
|
|
else if (!strncasecmp("DPX", buf, strlen("DPX")))
|
|
mode = AMDGPU_DPX_PARTITION_MODE;
|
|
else if (!strncasecmp("TPX", buf, strlen("TPX")))
|
|
mode = AMDGPU_TPX_PARTITION_MODE;
|
|
else if (!strncasecmp("QPX", buf, strlen("QPX")))
|
|
mode = AMDGPU_QPX_PARTITION_MODE;
|
|
else if (!strncasecmp("CPX", buf, strlen("CPX")))
|
|
mode = AMDGPU_CPX_PARTITION_MODE;
|
|
else
|
|
return -EINVAL;
|
|
|
|
r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
|
|
|
|
if (r)
|
|
return r;
|
|
|
|
xcp_cfg->mode = mode;
|
|
return size;
|
|
}
|
|
|
|
static struct kobj_attribute xcp_cfg_sysfs_mode =
|
|
__ATTR_RW_MODE(xcp_config, 0644);
|
|
|
|
static void xcp_cfg_sysfs_release(struct kobject *kobj)
|
|
{
|
|
struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
|
|
|
|
kfree(xcp_cfg);
|
|
}
|
|
|
|
static const struct kobj_type xcp_cfg_sysfs_ktype = {
|
|
.release = xcp_cfg_sysfs_release,
|
|
.sysfs_ops = &kobj_sysfs_ops,
|
|
};
|
|
|
|
static struct kobj_attribute supp_part_sysfs_mode =
|
|
__ATTR_RO(supported_xcp_configs);
|
|
|
|
static struct kobj_attribute supp_nps_sysfs_mode =
|
|
__ATTR_RO(supported_nps_configs);
|
|
|
|
static const struct attribute *xcp_attrs[] = {
|
|
&supp_part_sysfs_mode.attr,
|
|
&xcp_cfg_sysfs_mode.attr,
|
|
NULL,
|
|
};
|
|
|
|
void amdgpu_xcp_cfg_sysfs_init(struct amdgpu_device *adev)
|
|
{
|
|
struct amdgpu_xcp_res_details *xcp_res;
|
|
struct amdgpu_xcp_cfg *xcp_cfg;
|
|
int i, r, j, rid, mode;
|
|
|
|
if (!adev->xcp_mgr)
|
|
return;
|
|
|
|
xcp_cfg = kzalloc(sizeof(*xcp_cfg), GFP_KERNEL);
|
|
if (!xcp_cfg)
|
|
return;
|
|
xcp_cfg->xcp_mgr = adev->xcp_mgr;
|
|
|
|
r = kobject_init_and_add(&xcp_cfg->kobj, &xcp_cfg_sysfs_ktype,
|
|
&adev->dev->kobj, "compute_partition_config");
|
|
if (r)
|
|
goto err1;
|
|
|
|
r = sysfs_create_files(&xcp_cfg->kobj, xcp_attrs);
|
|
if (r)
|
|
goto err1;
|
|
|
|
if (adev->gmc.supported_nps_modes != 0) {
|
|
r = sysfs_create_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
|
|
if (r) {
|
|
sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
|
|
goto err1;
|
|
}
|
|
}
|
|
|
|
mode = (xcp_cfg->xcp_mgr->mode ==
|
|
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) ?
|
|
AMDGPU_SPX_PARTITION_MODE :
|
|
xcp_cfg->xcp_mgr->mode;
|
|
r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
|
|
if (r) {
|
|
sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
|
|
sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
|
|
goto err1;
|
|
}
|
|
|
|
xcp_cfg->mode = mode;
|
|
for (i = 0; i < xcp_cfg->num_res; i++) {
|
|
xcp_res = &xcp_cfg->xcp_res[i];
|
|
rid = xcp_res->id;
|
|
r = kobject_init_and_add(&xcp_res->kobj,
|
|
&xcp_cfg_res_sysfs_ktype,
|
|
&xcp_cfg->kobj, "%s",
|
|
xcp_res_names[rid]);
|
|
if (r)
|
|
goto err;
|
|
}
|
|
|
|
adev->xcp_mgr->xcp_cfg = xcp_cfg;
|
|
return;
|
|
err:
|
|
for (j = 0; j < i; j++) {
|
|
xcp_res = &xcp_cfg->xcp_res[i];
|
|
kobject_put(&xcp_res->kobj);
|
|
}
|
|
|
|
sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
|
|
sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
|
|
err1:
|
|
kobject_put(&xcp_cfg->kobj);
|
|
}
|
|
|
|
void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev)
|
|
{
|
|
struct amdgpu_xcp_res_details *xcp_res;
|
|
struct amdgpu_xcp_cfg *xcp_cfg;
|
|
int i;
|
|
|
|
if (!adev->xcp_mgr || !adev->xcp_mgr->xcp_cfg)
|
|
return;
|
|
|
|
xcp_cfg = adev->xcp_mgr->xcp_cfg;
|
|
for (i = 0; i < xcp_cfg->num_res; i++) {
|
|
xcp_res = &xcp_cfg->xcp_res[i];
|
|
kobject_put(&xcp_res->kobj);
|
|
}
|
|
|
|
sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
|
|
sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
|
|
kobject_put(&xcp_cfg->kobj);
|
|
}
|