mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	 e46e31a369
			
		
	
	
		e46e31a369
		
	
	
	
	
		
			
			When using the Promise TX2+ SATA controller on PA-RISC, the system often crashes with kernel panic, for example just writing data with the dd utility will make it crash. Kernel panic - not syncing: drivers/parisc/sba_iommu.c: I/O MMU @ 000000000000a000 is out of mapping resources CPU: 0 PID: 18442 Comm: mkspadfs Not tainted 4.4.0-rc2 #2 Backtrace: [<000000004021497c>] show_stack+0x14/0x20 [<0000000040410bf0>] dump_stack+0x88/0x100 [<000000004023978c>] panic+0x124/0x360 [<0000000040452c18>] sba_alloc_range+0x698/0x6a0 [<0000000040453150>] sba_map_sg+0x260/0x5b8 [<000000000c18dbb4>] ata_qc_issue+0x264/0x4a8 [libata] [<000000000c19535c>] ata_scsi_translate+0xe4/0x220 [libata] [<000000000c19a93c>] ata_scsi_queuecmd+0xbc/0x320 [libata] [<0000000040499bbc>] scsi_dispatch_cmd+0xfc/0x130 [<000000004049da34>] scsi_request_fn+0x6e4/0x970 [<00000000403e95a8>] __blk_run_queue+0x40/0x60 [<00000000403e9d8c>] blk_run_queue+0x3c/0x68 [<000000004049a534>] scsi_run_queue+0x2a4/0x360 [<000000004049be68>] scsi_end_request+0x1a8/0x238 [<000000004049de84>] scsi_io_completion+0xfc/0x688 [<0000000040493c74>] scsi_finish_command+0x17c/0x1d0 The cause of the crash is not exhaustion of the IOMMU space, there is plenty of free pages. The function sba_alloc_range is called with size 0x11000, thus the pages_needed variable is 0x11. The function sba_search_bitmap is called with bits_wanted 0x11 and boundary size is 0x10 (because dma_get_seg_boundary(dev) returns 0xffff). The function sba_search_bitmap attempts to allocate 17 pages that must not cross 16-page boundary - it can't satisfy this requirement (iommu_is_span_boundary always returns true) and fails even if there are many free entries in the IOMMU space. How did it happen that we try to allocate 17 pages that don't cross 16-page boundary? The cause is in the function iommu_coalesce_chunks. This function tries to coalesce adjacent entries in the scatterlist. The function does several checks if it may coalesce one entry with the next, one of those checks is this: if (startsg->length + dma_len > max_seg_size) break; When it finishes coalescing adjacent entries, it allocates the mapping: sg_dma_len(contig_sg) = dma_len; dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE); sg_dma_address(contig_sg) = PIDE_FLAG | (iommu_alloc_range(ioc, dev, dma_len) << IOVP_SHIFT) | dma_offset; It is possible that (startsg->length + dma_len > max_seg_size) is false (we are just near the 0x10000 max_seg_size boundary), so the funcion decides to coalesce this entry with the next entry. When the coalescing succeeds, the function performs dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE); And now, because of non-zero dma_offset, dma_len is greater than 0x10000. iommu_alloc_range (a pointer to sba_alloc_range) is called and it attempts to allocate 17 pages for a device that must not cross 16-page boundary. To fix the bug, we must make sure that dma_len after addition of dma_offset and alignment doesn't cross the segment boundary. I.e. change if (startsg->length + dma_len > max_seg_size) break; to if (ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) > max_seg_size) break; This patch makes this change (it precalculates max_seg_boundary at the beginning of the function iommu_coalesce_chunks). I also added a check that the mapping length doesn't exceed dma_get_seg_boundary(dev) (it is not needed for Promise TX2+ SATA, but it may be needed for other devices that have dma_get_seg_boundary lower than dma_get_max_seg_size). Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Helge Deller <deller@gmx.de>
		
			
				
	
	
		
			182 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			182 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #include <linux/prefetch.h>
 | |
| 
 | |
| /**
 | |
|  * iommu_fill_pdir - Insert coalesced scatter/gather chunks into the I/O Pdir.
 | |
|  * @ioc: The I/O Controller.
 | |
|  * @startsg: The scatter/gather list of coalesced chunks.
 | |
|  * @nents: The number of entries in the scatter/gather list.
 | |
|  * @hint: The DMA Hint.
 | |
|  *
 | |
|  * This function inserts the coalesced scatter/gather list chunks into the
 | |
|  * I/O Controller's I/O Pdir.
 | |
|  */ 
 | |
| static inline unsigned int
 | |
| iommu_fill_pdir(struct ioc *ioc, struct scatterlist *startsg, int nents, 
 | |
| 		unsigned long hint,
 | |
| 		void (*iommu_io_pdir_entry)(u64 *, space_t, unsigned long,
 | |
| 					    unsigned long))
 | |
| {
 | |
| 	struct scatterlist *dma_sg = startsg;	/* pointer to current DMA */
 | |
| 	unsigned int n_mappings = 0;
 | |
| 	unsigned long dma_offset = 0, dma_len = 0;
 | |
| 	u64 *pdirp = NULL;
 | |
| 
 | |
| 	/* Horrible hack.  For efficiency's sake, dma_sg starts one 
 | |
| 	 * entry below the true start (it is immediately incremented
 | |
| 	 * in the loop) */
 | |
| 	 dma_sg--;
 | |
| 
 | |
| 	while (nents-- > 0) {
 | |
| 		unsigned long vaddr;
 | |
| 		long size;
 | |
| 
 | |
| 		DBG_RUN_SG(" %d : %08lx/%05x %p/%05x\n", nents,
 | |
| 			   (unsigned long)sg_dma_address(startsg), cnt,
 | |
| 			   sg_virt(startsg), startsg->length
 | |
| 		);
 | |
| 
 | |
| 
 | |
| 		/*
 | |
| 		** Look for the start of a new DMA stream
 | |
| 		*/
 | |
| 		
 | |
| 		if (sg_dma_address(startsg) & PIDE_FLAG) {
 | |
| 			u32 pide = sg_dma_address(startsg) & ~PIDE_FLAG;
 | |
| 
 | |
| 			BUG_ON(pdirp && (dma_len != sg_dma_len(dma_sg)));
 | |
| 
 | |
| 			dma_sg++;
 | |
| 
 | |
| 			dma_len = sg_dma_len(startsg);
 | |
| 			sg_dma_len(startsg) = 0;
 | |
| 			dma_offset = (unsigned long) pide & ~IOVP_MASK;
 | |
| 			n_mappings++;
 | |
| #if defined(ZX1_SUPPORT)
 | |
| 			/* Pluto IOMMU IO Virt Address is not zero based */
 | |
| 			sg_dma_address(dma_sg) = pide | ioc->ibase;
 | |
| #else
 | |
| 			/* SBA, ccio, and dino are zero based.
 | |
| 			 * Trying to save a few CPU cycles for most users.
 | |
| 			 */
 | |
| 			sg_dma_address(dma_sg) = pide;
 | |
| #endif
 | |
| 			pdirp = &(ioc->pdir_base[pide >> IOVP_SHIFT]);
 | |
| 			prefetchw(pdirp);
 | |
| 		}
 | |
| 		
 | |
| 		BUG_ON(pdirp == NULL);
 | |
| 		
 | |
| 		vaddr = (unsigned long)sg_virt(startsg);
 | |
| 		sg_dma_len(dma_sg) += startsg->length;
 | |
| 		size = startsg->length + dma_offset;
 | |
| 		dma_offset = 0;
 | |
| #ifdef IOMMU_MAP_STATS
 | |
| 		ioc->msg_pages += startsg->length >> IOVP_SHIFT;
 | |
| #endif
 | |
| 		do {
 | |
| 			iommu_io_pdir_entry(pdirp, KERNEL_SPACE, 
 | |
| 					    vaddr, hint);
 | |
| 			vaddr += IOVP_SIZE;
 | |
| 			size -= IOVP_SIZE;
 | |
| 			pdirp++;
 | |
| 		} while(unlikely(size > 0));
 | |
| 		startsg++;
 | |
| 	}
 | |
| 	return(n_mappings);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
| ** First pass is to walk the SG list and determine where the breaks are
 | |
| ** in the DMA stream. Allocates PDIR entries but does not fill them.
 | |
| ** Returns the number of DMA chunks.
 | |
| **
 | |
| ** Doing the fill separate from the coalescing/allocation keeps the
 | |
| ** code simpler. Future enhancement could make one pass through
 | |
| ** the sglist do both.
 | |
| */
 | |
| 
 | |
| static inline unsigned int
 | |
| iommu_coalesce_chunks(struct ioc *ioc, struct device *dev,
 | |
| 		struct scatterlist *startsg, int nents,
 | |
| 		int (*iommu_alloc_range)(struct ioc *, struct device *, size_t))
 | |
| {
 | |
| 	struct scatterlist *contig_sg;	   /* contig chunk head */
 | |
| 	unsigned long dma_offset, dma_len; /* start/len of DMA stream */
 | |
| 	unsigned int n_mappings = 0;
 | |
| 	unsigned int max_seg_size = min(dma_get_max_seg_size(dev),
 | |
| 					(unsigned)DMA_CHUNK_SIZE);
 | |
| 	unsigned int max_seg_boundary = dma_get_seg_boundary(dev) + 1;
 | |
| 	if (max_seg_boundary)	/* check if the addition above didn't overflow */
 | |
| 		max_seg_size = min(max_seg_size, max_seg_boundary);
 | |
| 
 | |
| 	while (nents > 0) {
 | |
| 
 | |
| 		/*
 | |
| 		** Prepare for first/next DMA stream
 | |
| 		*/
 | |
| 		contig_sg = startsg;
 | |
| 		dma_len = startsg->length;
 | |
| 		dma_offset = startsg->offset;
 | |
| 
 | |
| 		/* PARANOID: clear entries */
 | |
| 		sg_dma_address(startsg) = 0;
 | |
| 		sg_dma_len(startsg) = 0;
 | |
| 
 | |
| 		/*
 | |
| 		** This loop terminates one iteration "early" since
 | |
| 		** it's always looking one "ahead".
 | |
| 		*/
 | |
| 		while(--nents > 0) {
 | |
| 			unsigned long prev_end, sg_start;
 | |
| 
 | |
| 			prev_end = (unsigned long)sg_virt(startsg) +
 | |
| 							startsg->length;
 | |
| 
 | |
| 			startsg++;
 | |
| 			sg_start = (unsigned long)sg_virt(startsg);
 | |
| 
 | |
| 			/* PARANOID: clear entries */
 | |
| 			sg_dma_address(startsg) = 0;
 | |
| 			sg_dma_len(startsg) = 0;
 | |
| 
 | |
| 			/*
 | |
| 			** First make sure current dma stream won't
 | |
| 			** exceed max_seg_size if we coalesce the
 | |
| 			** next entry.
 | |
| 			*/   
 | |
| 			if (unlikely(ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) >
 | |
| 				     max_seg_size))
 | |
| 				break;
 | |
| 
 | |
| 			/*
 | |
| 			* Next see if we can append the next chunk (i.e.
 | |
| 			* it must end on one page and begin on another, or
 | |
| 			* it must start on the same address as the previous
 | |
| 			* entry ended.
 | |
| 			*/
 | |
| 			if (unlikely((prev_end != sg_start) ||
 | |
| 				((prev_end | sg_start) & ~PAGE_MASK)))
 | |
| 				break;
 | |
| 			
 | |
| 			dma_len += startsg->length;
 | |
| 		}
 | |
| 
 | |
| 		/*
 | |
| 		** End of DMA Stream
 | |
| 		** Terminate last VCONTIG block.
 | |
| 		** Allocate space for DMA stream.
 | |
| 		*/
 | |
| 		sg_dma_len(contig_sg) = dma_len;
 | |
| 		dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE);
 | |
| 		sg_dma_address(contig_sg) =
 | |
| 			PIDE_FLAG 
 | |
| 			| (iommu_alloc_range(ioc, dev, dma_len) << IOVP_SHIFT)
 | |
| 			| dma_offset;
 | |
| 		n_mappings++;
 | |
| 	}
 | |
| 
 | |
| 	return n_mappings;
 | |
| }
 | |
| 
 |