mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	drm/amdkfd: handle CPU fault on COW mapping
If CPU page fault in a page with zone_device_data svm_bo from another process, that means it is COW mapping in the child process and the range is migrated to VRAM by parent process. Migrate the parent process range back to system memory to recover the CPU page fault. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
		
							parent
							
								
									7a3f8b7c4c
								
							
						
					
					
						commit
						e1f84eef31
					
				| @ -886,7 +886,7 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, | ||||
| static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) | ||||
| { | ||||
| 	unsigned long addr = vmf->address; | ||||
| 	struct vm_area_struct *vma; | ||||
| 	struct svm_range_bo *svm_bo; | ||||
| 	enum svm_work_list_ops op; | ||||
| 	struct svm_range *parent; | ||||
| 	struct svm_range *prange; | ||||
| @ -894,29 +894,42 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) | ||||
| 	struct mm_struct *mm; | ||||
| 	int r = 0; | ||||
| 
 | ||||
| 	vma = vmf->vma; | ||||
| 	mm = vma->vm_mm; | ||||
| 	svm_bo = vmf->page->zone_device_data; | ||||
| 	if (!svm_bo) { | ||||
| 		pr_debug("failed get device page at addr 0x%lx\n", addr); | ||||
| 		return VM_FAULT_SIGBUS; | ||||
| 	} | ||||
| 	if (!mmget_not_zero(svm_bo->eviction_fence->mm)) { | ||||
| 		pr_debug("addr 0x%lx of process mm is detroyed\n", addr); | ||||
| 		return VM_FAULT_SIGBUS; | ||||
| 	} | ||||
| 
 | ||||
| 	p = kfd_lookup_process_by_mm(vma->vm_mm); | ||||
| 	mm = svm_bo->eviction_fence->mm; | ||||
| 	if (mm != vmf->vma->vm_mm) | ||||
| 		pr_debug("addr 0x%lx is COW mapping in child process\n", addr); | ||||
| 
 | ||||
| 	p = kfd_lookup_process_by_mm(mm); | ||||
| 	if (!p) { | ||||
| 		pr_debug("failed find process at fault address 0x%lx\n", addr); | ||||
| 		return VM_FAULT_SIGBUS; | ||||
| 		r = VM_FAULT_SIGBUS; | ||||
| 		goto out_mmput; | ||||
| 	} | ||||
| 	if (READ_ONCE(p->svms.faulting_task) == current) { | ||||
| 		pr_debug("skipping ram migration\n"); | ||||
| 		kfd_unref_process(p); | ||||
| 		return 0; | ||||
| 		r = 0; | ||||
| 		goto out_unref_process; | ||||
| 	} | ||||
| 	addr >>= PAGE_SHIFT; | ||||
| 
 | ||||
| 	pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr); | ||||
| 	addr >>= PAGE_SHIFT; | ||||
| 
 | ||||
| 	mutex_lock(&p->svms.lock); | ||||
| 
 | ||||
| 	prange = svm_range_from_addr(&p->svms, addr, &parent); | ||||
| 	if (!prange) { | ||||
| 		pr_debug("cannot find svm range at 0x%lx\n", addr); | ||||
| 		pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr); | ||||
| 		r = -EFAULT; | ||||
| 		goto out; | ||||
| 		goto out_unlock_svms; | ||||
| 	} | ||||
| 
 | ||||
| 	mutex_lock(&parent->migrate_mutex); | ||||
| @ -940,8 +953,8 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) | ||||
| 
 | ||||
| 	r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU); | ||||
| 	if (r) | ||||
| 		pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, | ||||
| 			 prange, prange->start, prange->last); | ||||
| 		pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n", | ||||
| 			 r, prange->svms, prange, prange->start, prange->last); | ||||
| 
 | ||||
| 	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ | ||||
| 	if (p->xnack_enabled && parent == prange) | ||||
| @ -955,9 +968,12 @@ out_unlock_prange: | ||||
| 	if (prange != parent) | ||||
| 		mutex_unlock(&prange->migrate_mutex); | ||||
| 	mutex_unlock(&parent->migrate_mutex); | ||||
| out: | ||||
| out_unlock_svms: | ||||
| 	mutex_unlock(&p->svms.lock); | ||||
| out_unref_process: | ||||
| 	kfd_unref_process(p); | ||||
| out_mmput: | ||||
| 	mmput(mm); | ||||
| 
 | ||||
| 	pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Philip Yang
						Philip Yang