Merge tag 'edac_updates_for_v6.16' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Borislav Petkov:

 - ie31200: Add support for Raptor Lake-S and Alder Lake-S compute dies

 - Rework how RRL registers per channel tracking is done in order to
   support newer hardware with different RRL configurations and refactor
   that code. Add support for Granite Rapids server

 - i10nm: explicitly set RRL modes to fix any wrong BIOS programming

 - Properly save and restore Retry Read error Log channel configuration
   info on Intel drivers

 - igen6: Handle correctly the case of fused off memory controllers on
   Arizona Beach and Amston Lake SoCs before adding support for them

 - the usual set of fixes and cleanups

* tag 'edac_updates_for_v6.16' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC/bluefield: Don't use bluefield_edac_readl() result on error
  EDAC/i10nm: Fix the bitwise operation between variables of different sizes
  EDAC/ie31200: Add two Intel SoCs for EDAC support
  EDAC/{skx_common,i10nm}: Add RRL support for Intel Granite Rapids server
  EDAC/{skx_common,i10nm}: Refactor show_retry_rd_err_log()
  EDAC/{skx_common,i10nm}: Refactor enable_retry_rd_err_log()
  EDAC/{skx_common,i10nm}: Structure the per-channel RRL registers
  EDAC/i10nm: Explicitly set the modes of the RRL register sets
  EDAC/{skx_common,i10nm}: Fix the loss of saved RRL for HBM pseudo channel 0
  EDAC/skx_common: Fix general protection fault
  EDAC/igen6: Add Intel Amston Lake SoCs support
  EDAC/igen6: Add Intel Arizona Beach SoCs support
  EDAC/igen6: Skip absent memory controllers
This commit is contained in:
Linus Torvalds
2025-05-27 10:13:06 -07:00
6 changed files with 418 additions and 227 deletions

View File

@@ -199,8 +199,10 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
* error without the detailed information.
*/
err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom);
if (err)
if (err) {
dev_err(priv->dev, "DRAM syndrom read failed.\n");
return;
}
serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
@@ -213,20 +215,26 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
}
err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info);
if (err)
if (err) {
dev_err(priv->dev, "DRAM additional info read failed.\n");
return;
}
err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info);
ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0;
err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0);
if (err)
if (err) {
dev_err(priv->dev, "Error addr 0 read failed.\n");
return;
}
err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1);
if (err)
if (err) {
dev_err(priv->dev, "Error addr 1 read failed.\n");
return;
}
ecc_dimm_addr = ((u64)edea1 << 32) | edea0;
@@ -250,8 +258,10 @@ static void bluefield_edac_check(struct mem_ctl_info *mci)
return;
err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count);
if (err)
if (err) {
dev_err(priv->dev, "ECC count read failed.\n");
return;
}
single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count);
double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);

View File

@@ -72,12 +72,6 @@
#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
#define RETRY_RD_ERR_LOG_UC BIT(1)
#define RETRY_RD_ERR_LOG_NOOVER BIT(14)
#define RETRY_RD_ERR_LOG_EN BIT(15)
#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1))
#define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0))
static struct list_head *i10nm_edac_list;
static struct res_config *res_cfg;
@@ -85,227 +79,319 @@ static int retry_rd_err_log;
static int decoding_via_mca;
static bool mem_cfg_2lm;
static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8};
static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8};
static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10};
static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
static struct reg_rrl icx_reg_rrl_ddr = {
.set_num = 2,
.reg_num = 6,
.modes = {LRE_SCRUB, LRE_DEMAND},
.offsets = {
{0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8},
{0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0},
},
.widths = {4, 4, 4, 4, 4, 8},
.v_mask = BIT(0),
.uc_mask = BIT(1),
.over_mask = BIT(2),
.en_patspr_mask = BIT(13),
.noover_mask = BIT(14),
.en_mask = BIT(15),
static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
u32 *offsets_scrub, u32 *offsets_demand,
u32 *offsets_demand2)
.cecnt_num = 4,
.cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24},
.cecnt_widths = {4, 4, 4, 4},
};
static struct reg_rrl spr_reg_rrl_ddr = {
.set_num = 3,
.reg_num = 6,
.modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND},
.offsets = {
{0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8},
{0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0},
{0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10},
},
.widths = {4, 4, 8, 4, 4, 8},
.v_mask = BIT(0),
.uc_mask = BIT(1),
.over_mask = BIT(2),
.en_patspr_mask = BIT(13),
.noover_mask = BIT(14),
.en_mask = BIT(15),
.cecnt_num = 4,
.cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24},
.cecnt_widths = {4, 4, 4, 4},
};
static struct reg_rrl spr_reg_rrl_hbm_pch0 = {
.set_num = 2,
.reg_num = 6,
.modes = {LRE_SCRUB, LRE_DEMAND},
.offsets = {
{0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8},
{0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0},
},
.widths = {4, 4, 8, 4, 4, 8},
.v_mask = BIT(0),
.uc_mask = BIT(1),
.over_mask = BIT(2),
.en_patspr_mask = BIT(13),
.noover_mask = BIT(14),
.en_mask = BIT(15),
.cecnt_num = 4,
.cecnt_offsets = {0x2818, 0x281c, 0x2820, 0x2824},
.cecnt_widths = {4, 4, 4, 4},
};
static struct reg_rrl spr_reg_rrl_hbm_pch1 = {
.set_num = 2,
.reg_num = 6,
.modes = {LRE_SCRUB, LRE_DEMAND},
.offsets = {
{0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8},
{0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0},
},
.widths = {4, 4, 8, 4, 4, 8},
.v_mask = BIT(0),
.uc_mask = BIT(1),
.over_mask = BIT(2),
.en_patspr_mask = BIT(13),
.noover_mask = BIT(14),
.en_mask = BIT(15),
.cecnt_num = 4,
.cecnt_offsets = {0x2c18, 0x2c1c, 0x2c20, 0x2c24},
.cecnt_widths = {4, 4, 4, 4},
};
static struct reg_rrl gnr_reg_rrl_ddr = {
.set_num = 4,
.reg_num = 6,
.modes = {FRE_SCRUB, FRE_DEMAND, LRE_SCRUB, LRE_DEMAND},
.offsets = {
{0x2f10, 0x2f20, 0x2f30, 0x2f50, 0x2f60, 0xba0},
{0x2f14, 0x2f24, 0x2f38, 0x2f54, 0x2f64, 0xba8},
{0x2f18, 0x2f28, 0x2f40, 0x2f58, 0x2f68, 0xbb0},
{0x2f1c, 0x2f2c, 0x2f48, 0x2f5c, 0x2f6c, 0xbb8},
},
.widths = {4, 4, 8, 4, 4, 8},
.v_mask = BIT(0),
.uc_mask = BIT(1),
.over_mask = BIT(2),
.en_patspr_mask = BIT(14),
.noover_mask = BIT(15),
.en_mask = BIT(12),
.cecnt_num = 8,
.cecnt_offsets = {0x2c10, 0x2c14, 0x2c18, 0x2c1c, 0x2c20, 0x2c24, 0x2c28, 0x2c2c},
.cecnt_widths = {4, 4, 4, 4, 4, 4, 4, 4},
};
static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width)
{
u32 s, d, d2;
switch (width) {
case 4:
return I10NM_GET_REG32(imc, chan, offset);
case 8:
return I10NM_GET_REG64(imc, chan, offset);
default:
i10nm_printk(KERN_ERR, "Invalid readd RRL 0x%x width %d\n", offset, width);
return 0;
}
}
s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]);
d = I10NM_GET_REG32(imc, chan, offsets_demand[0]);
if (offsets_demand2)
d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]);
static void write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val)
{
switch (width) {
case 4:
return I10NM_SET_REG32(imc, chan, offset, (u32)val);
default:
i10nm_printk(KERN_ERR, "Invalid write RRL 0x%x width %d\n", offset, width);
}
}
static void enable_rrl(struct skx_imc *imc, int chan, struct reg_rrl *rrl,
int rrl_set, bool enable, u32 *rrl_ctl)
{
enum rrl_mode mode = rrl->modes[rrl_set];
u32 offset = rrl->offsets[rrl_set][0], v;
u8 width = rrl->widths[0];
bool first, scrub;
/* First or last read error. */
first = (mode == FRE_SCRUB || mode == FRE_DEMAND);
/* Patrol scrub or on-demand read error. */
scrub = (mode == FRE_SCRUB || mode == LRE_SCRUB);
v = read_imc_reg(imc, chan, offset, width);
if (enable) {
/* Save default configurations */
imc->chan[chan].retry_rd_err_log_s = s;
imc->chan[chan].retry_rd_err_log_d = d;
if (offsets_demand2)
imc->chan[chan].retry_rd_err_log_d2 = d2;
/* Save default configurations. */
*rrl_ctl = v;
v &= ~rrl->uc_mask;
s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
s |= RETRY_RD_ERR_LOG_EN;
d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
d |= RETRY_RD_ERR_LOG_EN;
if (first)
v |= rrl->noover_mask;
else
v &= ~rrl->noover_mask;
if (offsets_demand2) {
d2 &= ~RETRY_RD_ERR_LOG_UC;
d2 |= RETRY_RD_ERR_LOG_NOOVER;
d2 |= RETRY_RD_ERR_LOG_EN;
}
if (scrub)
v |= rrl->en_patspr_mask;
else
v &= ~rrl->en_patspr_mask;
v |= rrl->en_mask;
} else {
/* Restore default configurations */
if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
s |= RETRY_RD_ERR_LOG_UC;
if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
s |= RETRY_RD_ERR_LOG_NOOVER;
if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
s &= ~RETRY_RD_ERR_LOG_EN;
if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
d |= RETRY_RD_ERR_LOG_UC;
if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
d |= RETRY_RD_ERR_LOG_NOOVER;
if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
d &= ~RETRY_RD_ERR_LOG_EN;
/* Restore default configurations. */
if (*rrl_ctl & rrl->uc_mask)
v |= rrl->uc_mask;
if (offsets_demand2) {
if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC)
d2 |= RETRY_RD_ERR_LOG_UC;
if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER))
d2 &= ~RETRY_RD_ERR_LOG_NOOVER;
if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN))
d2 &= ~RETRY_RD_ERR_LOG_EN;
if (first) {
if (!(*rrl_ctl & rrl->noover_mask))
v &= ~rrl->noover_mask;
} else {
if (*rrl_ctl & rrl->noover_mask)
v |= rrl->noover_mask;
}
if (scrub) {
if (!(*rrl_ctl & rrl->en_patspr_mask))
v &= ~rrl->en_patspr_mask;
} else {
if (*rrl_ctl & rrl->en_patspr_mask)
v |= rrl->en_patspr_mask;
}
if (!(*rrl_ctl & rrl->en_mask))
v &= ~rrl->en_mask;
}
I10NM_SET_REG32(imc, chan, offsets_scrub[0], s);
I10NM_SET_REG32(imc, chan, offsets_demand[0], d);
if (offsets_demand2)
I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2);
write_imc_reg(imc, chan, offset, width, v);
}
static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl,
bool enable, u32 *rrl_ctl)
{
for (int i = 0; i < rrl->set_num; i++)
enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i);
}
static void enable_rrls_ddr(struct skx_imc *imc, bool enable)
{
struct reg_rrl *rrl_ddr = res_cfg->reg_rrl_ddr;
int i, chan_num = res_cfg->ddr_chan_num;
struct skx_channel *chan = imc->chan;
if (!imc->mbase)
return;
for (i = 0; i < chan_num; i++)
enable_rrls(imc, i, rrl_ddr, enable, chan[i].rrl_ctl[0]);
}
static void enable_rrls_hbm(struct skx_imc *imc, bool enable)
{
struct reg_rrl **rrl_hbm = res_cfg->reg_rrl_hbm;
int i, chan_num = res_cfg->hbm_chan_num;
struct skx_channel *chan = imc->chan;
if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1])
return;
for (i = 0; i < chan_num; i++) {
enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]);
enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]);
}
}
static void enable_retry_rd_err_log(bool enable)
{
int i, j, imc_num, chan_num;
struct skx_imc *imc;
struct skx_dev *d;
int i, imc_num;
edac_dbg(2, "\n");
list_for_each_entry(d, i10nm_edac_list, list) {
imc_num = res_cfg->ddr_imc_num;
chan_num = res_cfg->ddr_chan_num;
for (i = 0; i < imc_num; i++) {
imc = &d->imc[i];
if (!imc->mbase)
continue;
for (j = 0; j < chan_num; j++)
__enable_retry_rd_err_log(imc, j, enable,
res_cfg->offsets_scrub,
res_cfg->offsets_demand,
res_cfg->offsets_demand2);
}
for (i = 0; i < imc_num; i++)
enable_rrls_ddr(&d->imc[i], enable);
imc_num += res_cfg->hbm_imc_num;
chan_num = res_cfg->hbm_chan_num;
for (; i < imc_num; i++) {
imc = &d->imc[i];
if (!imc->mbase || !imc->hbm_mc)
continue;
for (j = 0; j < chan_num; j++) {
__enable_retry_rd_err_log(imc, j, enable,
res_cfg->offsets_scrub_hbm0,
res_cfg->offsets_demand_hbm0,
NULL);
__enable_retry_rd_err_log(imc, j, enable,
res_cfg->offsets_scrub_hbm1,
res_cfg->offsets_demand_hbm1,
NULL);
}
}
for (; i < imc_num; i++)
enable_rrls_hbm(&d->imc[i], enable);
}
}
static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
int len, bool scrub_err)
{
int i, j, n, ch = res->channel, pch = res->cs & 1;
struct skx_imc *imc = &res->dev->imc[res->imc];
u32 log0, log1, log2, log3, log4;
u32 corr0, corr1, corr2, corr3;
u32 lxg0, lxg1, lxg3, lxg4;
u32 *xffsets = NULL;
u64 log2a, log5;
u64 lxg2a, lxg5;
u32 *offsets;
int n, pch;
u64 log, corr, status_mask;
struct reg_rrl *rrl;
bool scrub;
u32 offset;
u8 width;
if (!imc->mbase)
return;
if (imc->hbm_mc) {
pch = res->cs & 1;
rrl = imc->hbm_mc ? res_cfg->reg_rrl_hbm[pch] : res_cfg->reg_rrl_ddr;
if (pch)
offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 :
res_cfg->offsets_demand_hbm1;
else
offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 :
res_cfg->offsets_demand_hbm0;
} else {
if (scrub_err) {
offsets = res_cfg->offsets_scrub;
} else {
offsets = res_cfg->offsets_demand;
xffsets = res_cfg->offsets_demand2;
if (!rrl)
return;
status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask;
n = snprintf(msg, len, " retry_rd_err_log[");
for (i = 0; i < rrl->set_num; i++) {
scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB);
if (scrub_err != scrub)
continue;
for (j = 0; j < rrl->reg_num && len - n > 0; j++) {
offset = rrl->offsets[i][j];
width = rrl->widths[j];
log = read_imc_reg(imc, ch, offset, width);
if (width == 4)
n += snprintf(msg + n, len - n, "%.8llx ", log);
else
n += snprintf(msg + n, len - n, "%.16llx ", log);
/* Clear RRL status if RRL in Linux control mode. */
if (retry_rd_err_log == 2 && !j && (log & status_mask))
write_imc_reg(imc, ch, offset, width, log & ~status_mask);
}
}
log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
/* Move back one space. */
n--;
n += snprintf(msg + n, len - n, "]");
if (xffsets) {
lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]);
lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]);
lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]);
lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]);
lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]);
}
if (len - n > 0) {
n += snprintf(msg + n, len - n, " correrrcnt[");
for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) {
offset = rrl->cecnt_offsets[i];
width = rrl->cecnt_widths[i];
corr = read_imc_reg(imc, ch, offset, width);
if (res_cfg->type == SPR) {
log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx",
log0, log1, log2a, log3, log4, log5);
if (len - n > 0) {
if (xffsets) {
lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]);
n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]",
lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5);
/* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */
if (res_cfg->type <= SPR) {
n += snprintf(msg + n, len - n, "%.4llx %.4llx ",
corr & 0xffff, corr >> 16);
} else {
n += snprintf(msg + n, len - n, "]");
/* CPUs {GNR} encode one counter per CORRERRCNT register. */
if (width == 4)
n += snprintf(msg + n, len - n, "%.8llx ", corr);
else
n += snprintf(msg + n, len - n, "%.16llx ", corr);
}
}
} else {
log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
log0, log1, log2, log3, log4, log5);
}
if (imc->hbm_mc) {
if (pch) {
corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18);
corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c);
corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20);
corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24);
} else {
corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818);
corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c);
corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820);
corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824);
}
} else {
corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
}
if (len - n > 0)
snprintf(msg + n, len - n,
" correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
corr0 & 0xffff, corr0 >> 16,
corr1 & 0xffff, corr1 >> 16,
corr2 & 0xffff, corr2 >> 16,
corr3 & 0xffff, corr3 >> 16);
/* Clear status bits */
if (retry_rd_err_log == 2) {
if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) {
log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
}
if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0);
}
/* Move back one space. */
n--;
n += snprintf(msg + n, len - n, "]");
}
}
@@ -870,8 +956,7 @@ static struct res_config i10nm_cfg0 = {
.ddr_mdev_bdf = {0, 12, 0},
.hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x108,
.offsets_scrub = offsets_scrub_icx,
.offsets_demand = offsets_demand_icx,
.reg_rrl_ddr = &icx_reg_rrl_ddr,
};
static struct res_config i10nm_cfg1 = {
@@ -889,8 +974,7 @@ static struct res_config i10nm_cfg1 = {
.ddr_mdev_bdf = {0, 12, 0},
.hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x108,
.offsets_scrub = offsets_scrub_icx,
.offsets_demand = offsets_demand_icx,
.reg_rrl_ddr = &icx_reg_rrl_ddr,
};
static struct res_config spr_cfg = {
@@ -913,13 +997,9 @@ static struct res_config spr_cfg = {
.ddr_mdev_bdf = {0, 12, 0},
.hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x300,
.offsets_scrub = offsets_scrub_spr,
.offsets_scrub_hbm0 = offsets_scrub_spr_hbm0,
.offsets_scrub_hbm1 = offsets_scrub_spr_hbm1,
.offsets_demand = offsets_demand_spr,
.offsets_demand2 = offsets_demand2_spr,
.offsets_demand_hbm0 = offsets_demand_spr_hbm0,
.offsets_demand_hbm1 = offsets_demand_spr_hbm1,
.reg_rrl_ddr = &spr_reg_rrl_ddr,
.reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0,
.reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1,
};
static struct res_config gnr_cfg = {
@@ -937,6 +1017,7 @@ static struct res_config gnr_cfg = {
.uracu_bdf = {0, 0, 1},
.ddr_mdev_bdf = {0, 5, 1},
.sad_all_offset = 0x300,
.reg_rrl_ddr = &gnr_reg_rrl_ddr,
};
static const struct x86_cpu_id i10nm_cpuids[] = {
@@ -1108,7 +1189,7 @@ static int __init i10nm_init(void)
mce_register_decode_chain(&i10nm_mce_dec);
skx_setup_debug("i10nm_test");
if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
if (retry_rd_err_log && res_cfg->reg_rrl_ddr) {
skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
if (retry_rd_err_log == 2)
enable_retry_rd_err_log(true);
@@ -1128,7 +1209,7 @@ static void __exit i10nm_exit(void)
{
edac_dbg(2, "\n");
if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
if (retry_rd_err_log && res_cfg->reg_rrl_ddr) {
skx_set_decode(NULL, NULL);
if (retry_rd_err_log == 2)
enable_retry_rd_err_log(false);

View File

@@ -90,6 +90,10 @@
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630
#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4 0xa700
/* Alder Lake-S */
#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1 0x4660
#define IE31200_RANKS_PER_CHANNEL 8
#define IE31200_DIMMS_PER_CHANNEL 2
@@ -735,6 +739,8 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg},
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg},
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg},
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4), (kernel_ulong_t)&rpl_s_cfg},
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1), (kernel_ulong_t)&rpl_s_cfg},
{ 0, } /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl);

View File

@@ -127,6 +127,7 @@
static const struct res_config {
bool machine_check;
/* The number of present memory controllers. */
int num_imc;
u32 imc_base;
u32 cmf_base;
@@ -240,6 +241,12 @@ static struct work_struct ecclog_work;
#define DID_ADL_N_SKU11 0x467c
#define DID_ADL_N_SKU12 0x4632
/* Compute die IDs for Arizona Beach with IBECC */
#define DID_AZB_SKU1 0x4676
/* Compute did IDs for Amston Lake with IBECC */
#define DID_ASL_SKU1 0x464a
/* Compute die IDs for Raptor Lake-P with IBECC */
#define DID_RPL_P_SKU1 0xa706
#define DID_RPL_P_SKU2 0xa707
@@ -595,6 +602,8 @@ static const struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
@@ -1201,23 +1210,21 @@ static void igen6_check(struct mem_ctl_info *mci)
irq_work_queue(&ecclog_irq_work);
}
static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
/* Check whether the memory controller is absent. */
static bool igen6_imc_absent(void __iomem *window)
{
return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0;
}
static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev)
{
struct edac_mc_layer layers[2];
struct mem_ctl_info *mci;
struct igen6_imc *imc;
void __iomem *window;
int rc;
edac_dbg(2, "\n");
mchbar += mc * MCHBAR_SIZE;
window = ioremap(mchbar, MCHBAR_SIZE);
if (!window) {
igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
return -ENODEV;
}
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = NUM_CHANNELS;
layers[0].is_virt_csrow = false;
@@ -1283,7 +1290,6 @@ fail3:
fail2:
edac_mc_free(mci);
fail:
iounmap(window);
return rc;
}
@@ -1309,6 +1315,56 @@ static void igen6_unregister_mcis(void)
}
}
static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar)
{
void __iomem *window;
int lmc, pmc, rc;
u64 base;
for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) {
base = mchbar + pmc * MCHBAR_SIZE;
window = ioremap(base, MCHBAR_SIZE);
if (!window) {
igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc);
rc = -ENOMEM;
goto out_unregister_mcis;
}
if (igen6_imc_absent(window)) {
iounmap(window);
edac_dbg(2, "Skip absent mc%d\n", pmc);
continue;
}
rc = igen6_register_mci(lmc, window, pdev);
if (rc)
goto out_iounmap;
/* Done, if all present MCs are detected and registered. */
if (++lmc >= res_cfg->num_imc)
break;
}
if (!lmc) {
igen6_printk(KERN_ERR, "No mc found.\n");
return -ENODEV;
}
if (lmc < res_cfg->num_imc)
igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.",
res_cfg->num_imc, lmc);
return 0;
out_iounmap:
iounmap(window);
out_unregister_mcis:
igen6_unregister_mcis();
return rc;
}
static int igen6_mem_slice_setup(u64 mchbar)
{
struct igen6_imc *imc = &igen6_pvt->imc[0];
@@ -1405,7 +1461,7 @@ static void opstate_set(const struct res_config *cfg, const struct pci_device_id
static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
u64 mchbar;
int i, rc;
int rc;
edac_dbg(2, "\n");
@@ -1421,11 +1477,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
opstate_set(res_cfg, ent);
for (i = 0; i < res_cfg->num_imc; i++) {
rc = igen6_register_mci(i, mchbar, pdev);
if (rc)
goto fail2;
}
rc = igen6_register_mcis(pdev, mchbar);
if (rc)
goto fail;
if (res_cfg->num_imc > 1) {
rc = igen6_mem_slice_setup(mchbar);

View File

@@ -116,6 +116,7 @@ EXPORT_SYMBOL_GPL(skx_adxl_get);
void skx_adxl_put(void)
{
adxl_component_count = 0;
kfree(adxl_values);
kfree(adxl_msg);
}

View File

@@ -79,6 +79,47 @@
*/
#define MCACOD_EXT_MEM_ERR 0x280
/* Max RRL register sets per {,sub-,pseudo-}channel. */
#define NUM_RRL_SET 4
/* Max RRL registers per set. */
#define NUM_RRL_REG 6
/* Max correctable error count registers. */
#define NUM_CECNT_REG 8
/* Modes of RRL register set. */
enum rrl_mode {
/* Last read error from patrol scrub. */
LRE_SCRUB,
/* Last read error from demand. */
LRE_DEMAND,
/* First read error from patrol scrub. */
FRE_SCRUB,
/* First read error from demand. */
FRE_DEMAND,
};
/* RRL registers per {,sub-,pseudo-}channel. */
struct reg_rrl {
/* RRL register parts. */
int set_num, reg_num;
enum rrl_mode modes[NUM_RRL_SET];
u32 offsets[NUM_RRL_SET][NUM_RRL_REG];
/* RRL register widths in byte per set. */
u8 widths[NUM_RRL_REG];
/* RRL control bits of the first register per set. */
u32 v_mask;
u32 uc_mask;
u32 over_mask;
u32 en_patspr_mask;
u32 noover_mask;
u32 en_mask;
/* CORRERRCNT register parts. */
int cecnt_num;
u32 cecnt_offsets[NUM_CECNT_REG];
u8 cecnt_widths[NUM_CECNT_REG];
};
/*
* Each cpu socket contains some pci devices that provide global
* information, and also some that are local to each of the two
@@ -117,9 +158,11 @@ struct skx_dev {
struct skx_channel {
struct pci_dev *cdev;
struct pci_dev *edev;
u32 retry_rd_err_log_s;
u32 retry_rd_err_log_d;
u32 retry_rd_err_log_d2;
/*
* Two groups of RRL control registers per channel to save default RRL
* settings of two {sub-,pseudo-}channels in Linux RRL control mode.
*/
u32 rrl_ctl[2][NUM_RRL_SET];
struct skx_dimm {
u8 close_pg;
u8 bank_xor_enable;
@@ -232,14 +275,10 @@ struct res_config {
/* HBM mdev device BDF */
struct pci_bdf hbm_mdev_bdf;
int sad_all_offset;
/* Offsets of retry_rd_err_log registers */
u32 *offsets_scrub;
u32 *offsets_scrub_hbm0;
u32 *offsets_scrub_hbm1;
u32 *offsets_demand;
u32 *offsets_demand2;
u32 *offsets_demand_hbm0;
u32 *offsets_demand_hbm1;
/* RRL register sets per DDR channel */
struct reg_rrl *reg_rrl_ddr;
/* RRL register sets per HBM channel */
struct reg_rrl *reg_rrl_hbm[2];
};
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,