2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

drm/amdkfd: add kfd_device_info_init function

Initializes kfd->device_info given either asic_type (enum) if GFX
version is less than GFX9, or GC IP version if greater. Also takes in vf
and the target compiler gfx version. Uses SDMA version to determine
num_sdma_queues_per_engine.

Convert device_info to a non-pointer member of kfd, change references
accordingly.

Change unsupported asic condition to only probe f2g, move device_info
initialization post-switch.

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Graham Sider 2021-11-17 17:32:37 -05:00 committed by Alex Deucher
parent b7675b7bbc
commit f0dc99a6f7
11 changed files with 163 additions and 141 deletions

View File

@ -511,193 +511,215 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_dev *kfd);
static void kfd_device_info_init(struct kfd_dev *kfd,
bool vf, uint32_t gfx_target_version)
{
uint32_t gc_version = KFD_GC_VERSION(kfd);
uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
uint32_t asic_type = kfd->adev->asic_type;
kfd->device_info.max_pasid_bits = 16;
kfd->device_info.max_no_of_hqd = 24;
kfd->device_info.num_of_watch_points = 4;
kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
kfd->device_info.gfx_target_version = gfx_target_version;
if (KFD_IS_SOC15(kfd)) {
kfd->device_info.doorbell_size = 8;
kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
kfd->device_info.supports_cwsr = true;
if ((sdma_version >= IP_VERSION(4, 0, 0) &&
sdma_version <= IP_VERSION(4, 2, 0)) ||
sdma_version == IP_VERSION(5, 2, 1) ||
sdma_version == IP_VERSION(5, 2, 3))
kfd->device_info.num_sdma_queues_per_engine = 2;
else
kfd->device_info.num_sdma_queues_per_engine = 8;
/* Raven */
if (gc_version == IP_VERSION(9, 1, 0) ||
gc_version == IP_VERSION(9, 2, 2))
kfd->device_info.needs_iommu_device = true;
if (gc_version < IP_VERSION(11, 0, 0)) {
/* Navi2x+, Navi1x+ */
if (gc_version >= IP_VERSION(10, 3, 0))
kfd->device_info.no_atomic_fw_version = 145;
else if (gc_version >= IP_VERSION(10, 1, 1))
kfd->device_info.no_atomic_fw_version = 92;
/* Navi1x+ */
if (gc_version >= IP_VERSION(10, 1, 1))
kfd->device_info.needs_pci_atomics = true;
}
} else {
kfd->device_info.doorbell_size = 4;
kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
kfd->device_info.num_sdma_queues_per_engine = 2;
if (asic_type != CHIP_KAVERI &&
asic_type != CHIP_HAWAII &&
asic_type != CHIP_TONGA)
kfd->device_info.supports_cwsr = true;
if (asic_type == CHIP_KAVERI ||
asic_type == CHIP_CARRIZO)
kfd->device_info.needs_iommu_device = true;
if (asic_type != CHIP_HAWAII && !vf)
kfd->device_info.needs_pci_atomics = true;
}
}
struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
{
struct kfd_dev *kfd;
const struct kfd_device_info *device_info;
const struct kfd2kgd_calls *f2g;
struct kfd_dev *kfd = NULL;
const struct kfd2kgd_calls *f2g = NULL;
struct pci_dev *pdev = adev->pdev;
uint32_t gfx_target_version = 0;
switch (adev->asic_type) {
#ifdef KFD_SUPPORT_IOMMU_V2
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
if (vf)
device_info = NULL;
else
device_info = &kaveri_device_info;
gfx_target_version = 70000;
if (!vf)
f2g = &gfx_v7_kfd2kgd;
break;
#endif
case CHIP_CARRIZO:
if (vf)
device_info = NULL;
else
device_info = &carrizo_device_info;
gfx_target_version = 80001;
if (!vf)
f2g = &gfx_v8_kfd2kgd;
break;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_HAWAII:
if (vf)
device_info = NULL;
else
device_info = &hawaii_device_info;
gfx_target_version = 70001;
if (!vf)
f2g = &gfx_v7_kfd2kgd;
break;
#endif
case CHIP_TONGA:
if (vf)
device_info = NULL;
else
device_info = &tonga_device_info;
gfx_target_version = 80002;
if (!vf)
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_FIJI:
if (vf)
device_info = &fiji_vf_device_info;
else
device_info = &fiji_device_info;
gfx_target_version = 80003;
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS10:
if (vf)
device_info = &polaris10_vf_device_info;
else
device_info = &polaris10_device_info;
gfx_target_version = 80003;
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS11:
if (vf)
device_info = NULL;
else
device_info = &polaris11_device_info;
gfx_target_version = 80003;
if (!vf)
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS12:
if (vf)
device_info = NULL;
else
device_info = &polaris12_device_info;
gfx_target_version = 80003;
if (!vf)
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_VEGAM:
if (vf)
device_info = NULL;
else
device_info = &vegam_device_info;
gfx_target_version = 80003;
if (!vf)
f2g = &gfx_v8_kfd2kgd;
break;
default:
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
if (vf)
device_info = &vega10_vf_device_info;
else
device_info = &vega10_device_info;
gfx_target_version = 90000;
f2g = &gfx_v9_kfd2kgd;
break;
#ifdef KFD_SUPPORT_IOMMU_V2
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
if (vf)
device_info = NULL;
else
device_info = &raven_device_info;
gfx_target_version = 90002;
if (!vf)
f2g = &gfx_v9_kfd2kgd;
break;
#endif
case IP_VERSION(9, 2, 1):
if (vf)
device_info = NULL;
else
device_info = &vega12_device_info;
gfx_target_version = 90004;
if (!vf)
f2g = &gfx_v9_kfd2kgd;
break;
case IP_VERSION(9, 3, 0):
if (vf)
device_info = NULL;
else
device_info = &renoir_device_info;
gfx_target_version = 90012;
if (!vf)
f2g = &gfx_v9_kfd2kgd;
break;
case IP_VERSION(9, 4, 0):
if (vf)
device_info = NULL;
else
device_info = &vega20_device_info;
gfx_target_version = 90006;
if (!vf)
f2g = &gfx_v9_kfd2kgd;
break;
case IP_VERSION(9, 4, 1):
device_info = &arcturus_device_info;
gfx_target_version = 90008;
f2g = &arcturus_kfd2kgd;
break;
case IP_VERSION(9, 4, 2):
device_info = &aldebaran_device_info;
gfx_target_version = 90010;
f2g = &aldebaran_kfd2kgd;
break;
case IP_VERSION(10, 1, 10):
if (vf)
device_info = NULL;
else
device_info = &navi10_device_info;
gfx_target_version = 100100;
if (!vf)
f2g = &gfx_v10_kfd2kgd;
break;
case IP_VERSION(10, 1, 2):
device_info = &navi12_device_info;
gfx_target_version = 100101;
f2g = &gfx_v10_kfd2kgd;
break;
case IP_VERSION(10, 1, 1):
if (vf)
device_info = NULL;
else
device_info = &navi14_device_info;
gfx_target_version = 100102;
if (!vf)
f2g = &gfx_v10_kfd2kgd;
break;
case IP_VERSION(10, 1, 3):
if (vf)
device_info = NULL;
else
device_info = &cyan_skillfish_device_info;
gfx_target_version = 100103;
if (!vf)
f2g = &gfx_v10_kfd2kgd;
break;
case IP_VERSION(10, 3, 0):
device_info = &sienna_cichlid_device_info;
gfx_target_version = 100300;
f2g = &gfx_v10_3_kfd2kgd;
break;
case IP_VERSION(10, 3, 2):
device_info = &navy_flounder_device_info;
gfx_target_version = 100301;
f2g = &gfx_v10_3_kfd2kgd;
break;
case IP_VERSION(10, 3, 1):
if (vf)
device_info = NULL;
else
device_info = &vangogh_device_info;
gfx_target_version = 100303;
if (!vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
case IP_VERSION(10, 3, 4):
device_info = &dimgrey_cavefish_device_info;
gfx_target_version = 100302;
f2g = &gfx_v10_3_kfd2kgd;
break;
case IP_VERSION(10, 3, 5):
device_info = &beige_goby_device_info;
gfx_target_version = 100304;
f2g = &gfx_v10_3_kfd2kgd;
break;
case IP_VERSION(10, 3, 3):
if (vf)
device_info = NULL;
else
device_info = &yellow_carp_device_info;
gfx_target_version = 100305;
if (!vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
default:
return NULL;
break;
}
break;
}
if (!device_info || !f2g) {
if (!f2g) {
if (adev->ip_versions[GC_HWIP][0])
dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
@ -712,7 +734,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
return NULL;
kfd->adev = adev;
kfd->device_info = device_info;
kfd_device_info_init(kfd, vf, gfx_target_version);
kfd->pdev = pdev;
kfd->init_complete = false;
kfd->kfd2kgd = f2g;
@ -731,7 +753,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
if (cwsr_enable && kfd->device_info->supports_cwsr) {
if (cwsr_enable && kfd->device_info.supports_cwsr) {
if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
@ -815,14 +837,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
*/
kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
if (!kfd->pci_atomic_requested &&
kfd->device_info->needs_pci_atomics &&
(!kfd->device_info->no_atomic_fw_version ||
kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
kfd->device_info.needs_pci_atomics &&
(!kfd->device_info.no_atomic_fw_version ||
kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
dev_info(kfd_device,
"skipped device %x:%x, PCI rejects atomics %d<%d\n",
kfd->pdev->vendor, kfd->pdev->device,
kfd->mec_fw_version,
kfd->device_info->no_atomic_fw_version);
kfd->device_info.no_atomic_fw_version);
return false;
}
@ -839,7 +861,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
kfd->device_info.mqd_size_aligned;
/*
* calculate max size of runlist packet.
@ -1114,7 +1136,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
if (!kfd->init_complete)
return;
if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
dev_err_once(kfd_device, "Ring entry too small\n");
return;
}

View File

@ -108,13 +108,13 @@ static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
return kfd_get_num_sdma_engines(dqm->dev) *
dqm->dev->device_info->num_sdma_queues_per_engine;
dqm->dev->device_info.num_sdma_queues_per_engine;
}
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
{
return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
dqm->dev->device_info->num_sdma_queues_per_engine;
dqm->dev->device_info.num_sdma_queues_per_engine;
}
void program_sh_mem_settings(struct device_queue_manager *dqm,
@ -1838,7 +1838,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
get_num_all_sdma_engines(dqm) *
dev->device_info->num_sdma_queues_per_engine +
dev->device_info.num_sdma_queues_per_engine +
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
@ -2082,7 +2082,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
for (queue = 0;
queue < dqm->dev->device_info->num_sdma_queues_per_engine;
queue < dqm->dev->device_info.num_sdma_queues_per_engine;
queue++) {
r = dqm->dev->kfd2kgd->hqd_sdma_dump(
dqm->dev->adev, pipe, queue, &dump, &n_regs);

View File

@ -48,7 +48,7 @@
/* # of doorbell bytes allocated for each process. */
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
return roundup(kfd->device_info->doorbell_size *
return roundup(kfd->device_info.doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE);
}
@ -180,7 +180,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
inx *= kfd->device_info->doorbell_size / sizeof(u32);
inx *= kfd->device_info.doorbell_size / sizeof(u32);
/*
* Calculating the kernel doorbell offset using the first
@ -201,7 +201,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
unsigned int inx;
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
* sizeof(u32) / kfd->device_info->doorbell_size;
* sizeof(u32) / kfd->device_info.doorbell_size;
mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_available_index);
@ -239,7 +239,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
return kfd->doorbell_base_dw_offset +
pdd->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
doorbell_id * kfd->device_info.doorbell_size / sizeof(u32);
}
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)

View File

@ -135,7 +135,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
*patched_flag = true;
memcpy(patched_ihre, ih_ring_entry,
dev->device_info->ih_ring_entry_size);
dev->device_info.ih_ring_entry_size);
pasid = dev->dqm->vmid_pasid[vmid];

View File

@ -54,7 +54,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
int r;
r = kfifo_alloc(&kfd->ih_fifo,
KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size,
GFP_KERNEL);
if (r) {
dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
@ -114,8 +114,8 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
int count;
count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
kfd->device_info->ih_ring_entry_size);
if (count != kfd->device_info->ih_ring_entry_size) {
kfd->device_info.ih_ring_entry_size);
if (count != kfd->device_info.ih_ring_entry_size) {
dev_err_ratelimited(kfd_chardev(),
"Interrupt ring overflow, dropping interrupt %d\n",
count);
@ -133,11 +133,11 @@ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
int count;
count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
kfd->device_info->ih_ring_entry_size);
kfd->device_info.ih_ring_entry_size);
WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);
WARN_ON(count && count != kfd->device_info.ih_ring_entry_size);
return count == kfd->device_info->ih_ring_entry_size;
return count == kfd->device_info.ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
@ -146,13 +146,13 @@ static void interrupt_wq(struct work_struct *work)
interrupt_work);
uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
dev_err_once(kfd_chardev(), "Ring entry too small\n");
return;
}
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
dev->device_info->event_interrupt_class->interrupt_wq(dev,
dev->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
}
@ -163,7 +163,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev,
/* integer and bitwise OR so there is no boolean short-circuiting */
unsigned int wanted = 0;
wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
wanted |= dev->device_info.event_interrupt_class->interrupt_isr(dev,
ih_ring_entry, patched_ihre, flag);
return wanted != 0;

View File

@ -89,7 +89,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
}
pasid_limit = min_t(unsigned int,
(unsigned int)(1 << kfd->device_info->max_pasid_bits),
(unsigned int)(1 << kfd->device_info.max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {

View File

@ -111,7 +111,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
retval = kfd_gtt_sa_allocate(dev, dev->device_info.doorbell_size,
&kq->wptr_mem);
if (retval != 0)
@ -297,7 +297,7 @@ void kq_submit_packet(struct kernel_queue *kq)
}
pr_debug("\n");
#endif
if (kq->dev->device_info->doorbell_size == 8) {
if (kq->dev->device_info.doorbell_size == 8) {
*kq->wptr64_kernel = kq->pending_wptr64;
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
@ -310,7 +310,7 @@ void kq_submit_packet(struct kernel_queue *kq)
void kq_rollback_packet(struct kernel_queue *kq)
{
if (kq->dev->device_info->doorbell_size == 8) {
if (kq->dev->device_info.doorbell_size == 8) {
kq->pending_wptr64 = *kq->wptr64_kernel;
kq->pending_wptr = *kq->wptr_kernel %
(kq->queue->properties.queue_size / 4);

View File

@ -71,7 +71,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
return NULL;
offset = (q->sdma_engine_id *
dev->device_info->num_sdma_queues_per_engine +
dev->device_info.num_sdma_queues_per_engine +
q->sdma_queue_id) *
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;

View File

@ -230,7 +230,7 @@ struct kfd_vmid_info {
struct kfd_dev {
struct amdgpu_device *adev;
const struct kfd_device_info *device_info;
struct kfd_device_info device_info;
struct pci_dev *pdev;
struct drm_device *ddev;

View File

@ -219,7 +219,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* Hence we also check the type as well
*/
if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
max_queues = dev->device_info->max_no_of_hqd/2;
max_queues = dev->device_info.max_no_of_hqd/2;
if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC;

View File

@ -503,7 +503,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
if (dev->gpu) {
log_max_watch_addr =
__ilog2_u32(dev->gpu->device_info->num_of_watch_points);
__ilog2_u32(dev->gpu->device_info.num_of_watch_points);
if (log_max_watch_addr) {
dev->node_props.capability |=
@ -1382,7 +1382,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.simd_arrays_per_engine =
cu_info.num_shader_arrays_per_engine;
dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version;
dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version;
dev->node_props.vendor_id = gpu->pdev->vendor;
dev->node_props.device_id = gpu->pdev->device;
dev->node_props.capability |=
@ -1402,7 +1402,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.num_sdma_xgmi_engines =
kfd_get_num_xgmi_sdma_engines(gpu);
dev->node_props.num_sdma_queues_per_engine =
gpu->device_info->num_sdma_queues_per_engine;
gpu->device_info.num_sdma_queues_per_engine;
dev->node_props.num_gws = (dev->gpu->gws &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
dev->gpu->adev->gds.gws_size : 0;
@ -1578,7 +1578,7 @@ void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
gpu->use_iommu_v2 = false;
if (!gpu->device_info->needs_iommu_device)
if (!gpu->device_info.needs_iommu_device)
return;
down_read(&topology_lock);