mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS.  Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
  edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
 a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
			
			
This commit is contained in:
		
							parent
							
								
									bb89e7141a
								
							
						
					
					
						commit
						7d375bffa5
					
				| @ -34,7 +34,7 @@ static int probed; | ||||
| /*
 | ||||
|  * Alter this version for the module when modifications are made | ||||
|  */ | ||||
| #define SBRIDGE_REVISION    " Ver: 1.1.0 " | ||||
| #define SBRIDGE_REVISION    " Ver: 1.1.1 " | ||||
| #define EDAC_MOD_STR      "sbridge_edac" | ||||
| 
 | ||||
| /*
 | ||||
| @ -254,7 +254,7 @@ static const u32 correrrthrsld[] = { | ||||
|  * sbridge structs | ||||
|  */ | ||||
| 
 | ||||
| #define NUM_CHANNELS		4 | ||||
| #define NUM_CHANNELS		8	/* 2MC per socket, four chan per MC */ | ||||
| #define MAX_DIMMS		3	/* Max DIMMS per channel */ | ||||
| #define CHANNEL_UNSPECIFIED	0xf	/* Intel IA32 SDM 15-14 */ | ||||
| 
 | ||||
| @ -393,6 +393,8 @@ static const struct pci_id_table pci_dev_descr_sbridge_table[] = { | ||||
| #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS		0x0e79 | ||||
| #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0	0x0e6a | ||||
| #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1	0x0e6b | ||||
| #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2	0x0e6c | ||||
| #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3	0x0e6d | ||||
| 
 | ||||
| static const struct pci_id_descr pci_dev_descr_ibridge[] = { | ||||
| 		/* Processor Home Agent */ | ||||
| @ -421,6 +423,8 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = { | ||||
| #endif | ||||
| 	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1)	}, | ||||
| 	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1)	}, | ||||
| 	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2, 1)	}, | ||||
| 	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3, 1)	}, | ||||
| 
 | ||||
| 	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1)	}, | ||||
| 	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1)	}, | ||||
| @ -909,6 +913,8 @@ static int get_dimm_config(struct mem_ctl_info *mci) | ||||
| 	for (i = 0; i < NUM_CHANNELS; i++) { | ||||
| 		u32 mtr; | ||||
| 
 | ||||
| 		if (!pvt->pci_tad[i]) | ||||
| 			continue; | ||||
| 		for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { | ||||
| 			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, | ||||
| 				       i, j, 0); | ||||
| @ -925,8 +931,8 @@ static int get_dimm_config(struct mem_ctl_info *mci) | ||||
| 				size = ((u64)rows * cols * banks * ranks) >> (20 - 3); | ||||
| 				npages = MiB_TO_PAGES(size); | ||||
| 
 | ||||
| 				edac_dbg(0, "mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", | ||||
| 					 pvt->sbridge_dev->mc, i, j, | ||||
| 				edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", | ||||
| 					 pvt->sbridge_dev->mc, i/4, i%4, j, | ||||
| 					 size, npages, | ||||
| 					 banks, ranks, rows, cols); | ||||
| 
 | ||||
| @ -946,8 +952,8 @@ static int get_dimm_config(struct mem_ctl_info *mci) | ||||
| 				dimm->mtype = mtype; | ||||
| 				dimm->edac_mode = mode; | ||||
| 				snprintf(dimm->label, sizeof(dimm->label), | ||||
| 					 "CPU_SrcID#%u_Channel#%u_DIMM#%u", | ||||
| 					 pvt->sbridge_dev->source_id, i, j); | ||||
| 					 "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u", | ||||
| 					 pvt->sbridge_dev->source_id, i/4, i%4, j); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| @ -1128,7 +1134,7 @@ static struct mem_ctl_info *get_mci_for_node_id(u8 node_id) | ||||
| 
 | ||||
| static int get_memory_error_data(struct mem_ctl_info *mci, | ||||
| 				 u64 addr, | ||||
| 				 u8 *socket, | ||||
| 				 u8 *socket, u8 *ha, | ||||
| 				 long *channel_mask, | ||||
| 				 u8 *rank, | ||||
| 				 char **area_type, char *msg) | ||||
| @ -1141,7 +1147,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, | ||||
| 	int			interleave_mode, shiftup = 0; | ||||
| 	unsigned		sad_interleave[pvt->info.max_interleave]; | ||||
| 	u32			reg, dram_rule; | ||||
| 	u8			ch_way, sck_way, pkg, sad_ha = 0; | ||||
| 	u8			ch_way, sck_way, pkg, sad_ha = 0, ch_add = 0; | ||||
| 	u32			tad_offset; | ||||
| 	u32			rir_way; | ||||
| 	u32			mb, gb; | ||||
| @ -1254,6 +1260,8 @@ static int get_memory_error_data(struct mem_ctl_info *mci, | ||||
| 		pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx); | ||||
| 		*socket = sad_pkg_socket(pkg); | ||||
| 		sad_ha = sad_pkg_ha(pkg); | ||||
| 		if (sad_ha) | ||||
| 			ch_add = 4; | ||||
| 
 | ||||
| 		if (a7mode) { | ||||
| 			/* MCChanShiftUpEnable */ | ||||
| @ -1270,10 +1278,14 @@ static int get_memory_error_data(struct mem_ctl_info *mci, | ||||
| 		pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx); | ||||
| 		*socket = sad_pkg_socket(pkg); | ||||
| 		sad_ha = sad_pkg_ha(pkg); | ||||
| 		if (sad_ha) | ||||
| 			ch_add = 4; | ||||
| 		edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n", | ||||
| 			 idx, *socket, sad_ha); | ||||
| 	} | ||||
| 
 | ||||
| 	*ha = sad_ha; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Move to the proper node structure, in order to access the | ||||
| 	 * right PCI registers | ||||
| @ -1346,7 +1358,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, | ||||
| 	} | ||||
| 	*channel_mask = 1 << base_ch; | ||||
| 
 | ||||
| 	pci_read_config_dword(pvt->pci_tad[base_ch], | ||||
| 	pci_read_config_dword(pvt->pci_tad[ch_add + base_ch], | ||||
| 				tad_ch_nilv_offset[n_tads], | ||||
| 				&tad_offset); | ||||
| 
 | ||||
| @ -1405,7 +1417,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, | ||||
| 	 * Step 3) Decode rank | ||||
| 	 */ | ||||
| 	for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) { | ||||
| 		pci_read_config_dword(pvt->pci_tad[base_ch], | ||||
| 		pci_read_config_dword(pvt->pci_tad[ch_add + base_ch], | ||||
| 				      rir_way_limit[n_rir], | ||||
| 				      ®); | ||||
| 
 | ||||
| @ -1435,7 +1447,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, | ||||
| 		idx = (ch_addr >> 13);	/* FIXME: Datasheet says to shift by 15 */ | ||||
| 	idx %= 1 << rir_way; | ||||
| 
 | ||||
| 	pci_read_config_dword(pvt->pci_tad[base_ch], | ||||
| 	pci_read_config_dword(pvt->pci_tad[ch_add + base_ch], | ||||
| 			      rir_offset[n_rir][idx], | ||||
| 			      ®); | ||||
| 	*rank = RIR_RNK_TGT(reg); | ||||
| @ -1681,16 +1693,9 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, | ||||
| 				 struct sbridge_dev *sbridge_dev) | ||||
| { | ||||
| 	struct sbridge_pvt *pvt = mci->pvt_info; | ||||
| 	struct pci_dev *pdev, *tmp; | ||||
| 	struct pci_dev *pdev; | ||||
| 	u8 saw_chan_mask = 0; | ||||
| 	int i; | ||||
| 	bool mode_2ha = false; | ||||
| 
 | ||||
| 	tmp = pci_get_device(PCI_VENDOR_ID_INTEL, | ||||
| 			     PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, NULL); | ||||
| 	if (tmp) { | ||||
| 		mode_2ha = true; | ||||
| 		pci_dev_put(tmp); | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = 0; i < sbridge_dev->n_devs; i++) { | ||||
| 		pdev = sbridge_dev->pdev[i]; | ||||
| @ -1706,26 +1711,21 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS: | ||||
| 			pvt->pci_ras = pdev; | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2: | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3: | ||||
| 			/* if we have 2 HAs active, channels 2 and 3
 | ||||
| 			 * are in other device */ | ||||
| 			if (mode_2ha) | ||||
| 				break; | ||||
| 			/* fall through */ | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0: | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1: | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2: | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3: | ||||
| 		{ | ||||
| 			int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0; | ||||
| 			pvt->pci_tad[id] = pdev; | ||||
| 			saw_chan_mask |= 1 << id; | ||||
| 		} | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0: | ||||
| 			pvt->pci_ddrio = pdev; | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0: | ||||
| 			if (!mode_2ha) | ||||
| 				pvt->pci_ddrio = pdev; | ||||
| 			pvt->pci_ddrio = pdev; | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_SAD: | ||||
| 			pvt->pci_sad0 = pdev; | ||||
| @ -1741,13 +1741,12 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0: | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1: | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2: | ||||
| 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3: | ||||
| 		{ | ||||
| 			int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 + 2; | ||||
| 
 | ||||
| 			/* we shouldn't have this device if we have just one
 | ||||
| 			 * HA present */ | ||||
| 			WARN_ON(!mode_2ha); | ||||
| 			int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 + 4; | ||||
| 			pvt->pci_tad[id] = pdev; | ||||
| 			saw_chan_mask |= 1 << id; | ||||
| 		} | ||||
| 			break; | ||||
| 		default: | ||||
| @ -1766,10 +1765,10 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, | ||||
| 	    !pvt->pci_ta) | ||||
| 		goto enodev; | ||||
| 
 | ||||
| 	for (i = 0; i < NUM_CHANNELS; i++) { | ||||
| 		if (!pvt->pci_tad[i]) | ||||
| 			goto enodev; | ||||
| 	} | ||||
| 	if (saw_chan_mask != 0x0f && /* -EN */ | ||||
| 	    saw_chan_mask != 0x33 && /* -EP */ | ||||
| 	    saw_chan_mask != 0xff)   /* -EX */ | ||||
| 		goto enodev; | ||||
| 	return 0; | ||||
| 
 | ||||
| enodev: | ||||
| @ -1787,16 +1786,9 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci, | ||||
| 				 struct sbridge_dev *sbridge_dev) | ||||
| { | ||||
| 	struct sbridge_pvt *pvt = mci->pvt_info; | ||||
| 	struct pci_dev *pdev, *tmp; | ||||
| 	struct pci_dev *pdev; | ||||
| 	u8 saw_chan_mask = 0; | ||||
| 	int i; | ||||
| 	bool mode_2ha = false; | ||||
| 
 | ||||
| 	tmp = pci_get_device(PCI_VENDOR_ID_INTEL, | ||||
| 			     PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1, NULL); | ||||
| 	if (tmp) { | ||||
| 		mode_2ha = true; | ||||
| 		pci_dev_put(tmp); | ||||
| 	} | ||||
| 
 | ||||
| 	/* there's only one device per system; not tied to any bus */ | ||||
| 	if (pvt->info.pci_vtd == NULL) | ||||
| @ -1827,18 +1819,26 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci, | ||||
| 			pvt->pci_ras = pdev; | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0: | ||||
| 			pvt->pci_tad[0] = pdev; | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1: | ||||
| 			pvt->pci_tad[1] = pdev; | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2: | ||||
| 			if (!mode_2ha) | ||||
| 				pvt->pci_tad[2] = pdev; | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3: | ||||
| 			if (!mode_2ha) | ||||
| 				pvt->pci_tad[3] = pdev; | ||||
| 		{ | ||||
| 			int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0; | ||||
| 
 | ||||
| 			pvt->pci_tad[id] = pdev; | ||||
| 			saw_chan_mask |= 1 << id; | ||||
| 		} | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0: | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1: | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2: | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3: | ||||
| 		{ | ||||
| 			int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0 + 4; | ||||
| 
 | ||||
| 			pvt->pci_tad[id] = pdev; | ||||
| 			saw_chan_mask |= 1 << id; | ||||
| 		} | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0: | ||||
| 			pvt->pci_ddrio = pdev; | ||||
| @ -1849,14 +1849,6 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci, | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA: | ||||
| 			pvt->pci_ha1_ta = pdev; | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0: | ||||
| 			if (mode_2ha) | ||||
| 				pvt->pci_tad[2] = pdev; | ||||
| 			break; | ||||
| 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1: | ||||
| 			if (mode_2ha) | ||||
| 				pvt->pci_tad[3] = pdev; | ||||
| 			break; | ||||
| 		default: | ||||
| 			break; | ||||
| 		} | ||||
| @ -1872,10 +1864,10 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci, | ||||
| 	    !pvt->pci_ras  || !pvt->pci_ta || !pvt->info.pci_vtd) | ||||
| 		goto enodev; | ||||
| 
 | ||||
| 	for (i = 0; i < NUM_CHANNELS; i++) { | ||||
| 		if (!pvt->pci_tad[i]) | ||||
| 			goto enodev; | ||||
| 	} | ||||
| 	if (saw_chan_mask != 0x0f && /* -EN */ | ||||
| 	    saw_chan_mask != 0x33 && /* -EP */ | ||||
| 	    saw_chan_mask != 0xff)   /* -EX */ | ||||
| 		goto enodev; | ||||
| 	return 0; | ||||
| 
 | ||||
| enodev: | ||||
| @ -1986,7 +1978,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, | ||||
| 	u32 channel = GET_BITFIELD(m->status, 0, 3); | ||||
| 	u32 optypenum = GET_BITFIELD(m->status, 4, 6); | ||||
| 	long channel_mask, first_channel; | ||||
| 	u8  rank, socket; | ||||
| 	u8  rank, socket, ha; | ||||
| 	int rc, dimm; | ||||
| 	char *area_type = NULL; | ||||
| 
 | ||||
| @ -2048,7 +2040,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, | ||||
| 	if (!GET_BITFIELD(m->status, 58, 58)) | ||||
| 		return; | ||||
| 
 | ||||
| 	rc = get_memory_error_data(mci, m->addr, &socket, | ||||
| 	rc = get_memory_error_data(mci, m->addr, &socket, &ha, | ||||
| 				   &channel_mask, &rank, &area_type, msg); | ||||
| 	if (rc < 0) | ||||
| 		goto err_parsing; | ||||
| @ -2080,12 +2072,12 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, | ||||
| 		channel = first_channel; | ||||
| 
 | ||||
| 	snprintf(msg, sizeof(msg), | ||||
| 		 "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d", | ||||
| 		 "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d", | ||||
| 		 overflow ? " OVERFLOW" : "", | ||||
| 		 (uncorrected_error && recoverable) ? " recoverable" : "", | ||||
| 		 area_type, | ||||
| 		 mscod, errcode, | ||||
| 		 socket, | ||||
| 		 socket, ha, | ||||
| 		 channel_mask, | ||||
| 		 rank); | ||||
| 
 | ||||
| @ -2099,7 +2091,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, | ||||
| 	/* Call the helper to output message */ | ||||
| 	edac_mc_handle_error(tp_event, mci, core_err_cnt, | ||||
| 			     m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, | ||||
| 			     channel, dimm, -1, | ||||
| 			     4*ha+channel, dimm, -1, | ||||
| 			     optype, msg); | ||||
| 	return; | ||||
| err_parsing: | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Tony Luck
						Tony Luck