mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-22 07:27:12 +08:00
drm/amdgpu: implement get_retire_flip_bits for UMC v12
The RAS bad page retire flip bits can be set per vram type, vram vendor and NPS mode. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -1502,7 +1502,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
|
||||
adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM;
|
||||
adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM;
|
||||
adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET;
|
||||
adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL;
|
||||
if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
|
||||
adev->umc.ras = &umc_v12_0_ras;
|
||||
break;
|
||||
|
||||
@@ -174,6 +174,49 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev,
|
||||
umc_v12_0_reset_error_count(adev);
|
||||
}
|
||||
|
||||
static void umc_v12_0_get_retire_flip_bits(struct amdgpu_device *adev)
|
||||
{
|
||||
enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
|
||||
uint32_t vram_type = adev->gmc.vram_type;
|
||||
struct amdgpu_umc_flip_bits *flip_bits = &(adev->umc.flip_bits);
|
||||
|
||||
if (adev->gmc.gmc_funcs->query_mem_partition_mode)
|
||||
nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
|
||||
|
||||
/* default setting */
|
||||
flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT;
|
||||
flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT;
|
||||
flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT;
|
||||
flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT;
|
||||
flip_bits->bit_num = 4;
|
||||
|
||||
switch (vram_type) {
|
||||
case AMDGPU_VRAM_TYPE_HBM:
|
||||
/* other nps modes are taken as nps1 */
|
||||
if (nps == AMDGPU_NPS2_PARTITION_MODE) {
|
||||
flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT;
|
||||
flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT;
|
||||
flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT;
|
||||
flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
|
||||
}
|
||||
|
||||
if (nps == AMDGPU_NPS4_PARTITION_MODE) {
|
||||
flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT;
|
||||
flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT;
|
||||
flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT;
|
||||
flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
dev_warn(adev->dev,
|
||||
"Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
adev->umc.retire_unit = 0x1 << flip_bits->bit_num;
|
||||
}
|
||||
|
||||
static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data,
|
||||
struct ta_ras_query_address_input *addr_in,
|
||||
@@ -182,11 +225,10 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
|
||||
{
|
||||
uint32_t col, col_lower, row, row_lower, row_high, bank;
|
||||
uint32_t channel_index = 0, umc_inst = 0;
|
||||
uint32_t i, loop_bits[UMC_V12_0_RETIRE_LOOP_BITS];
|
||||
uint32_t i, bit_num, retire_unit, *flip_bits;
|
||||
uint64_t soc_pa, column, err_addr;
|
||||
struct ta_ras_query_address_output addr_out_tmp;
|
||||
struct ta_ras_query_address_output *paddr_out;
|
||||
enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
|
||||
int ret = 0;
|
||||
|
||||
if (!addr_out)
|
||||
@@ -211,34 +253,15 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
|
||||
umc_inst = addr_in->ma.umc_inst;
|
||||
}
|
||||
|
||||
loop_bits[0] = UMC_V12_0_PA_C2_BIT;
|
||||
loop_bits[1] = UMC_V12_0_PA_C3_BIT;
|
||||
loop_bits[2] = UMC_V12_0_PA_C4_BIT;
|
||||
loop_bits[3] = UMC_V12_0_PA_R13_BIT;
|
||||
|
||||
if (adev->gmc.gmc_funcs->query_mem_partition_mode)
|
||||
nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
|
||||
|
||||
/* other nps modes are taken as nps1 */
|
||||
if (nps == AMDGPU_NPS2_PARTITION_MODE) {
|
||||
loop_bits[0] = UMC_V12_0_PA_CH5_BIT;
|
||||
loop_bits[1] = UMC_V12_0_PA_C2_BIT;
|
||||
loop_bits[2] = UMC_V12_0_PA_B1_BIT;
|
||||
loop_bits[3] = UMC_V12_0_PA_R12_BIT;
|
||||
}
|
||||
|
||||
if (nps == AMDGPU_NPS4_PARTITION_MODE) {
|
||||
loop_bits[0] = UMC_V12_0_PA_CH4_BIT;
|
||||
loop_bits[1] = UMC_V12_0_PA_CH5_BIT;
|
||||
loop_bits[2] = UMC_V12_0_PA_B0_BIT;
|
||||
loop_bits[3] = UMC_V12_0_PA_R11_BIT;
|
||||
}
|
||||
flip_bits = adev->umc.flip_bits.flip_bits_in_pa;
|
||||
bit_num = adev->umc.flip_bits.bit_num;
|
||||
retire_unit = adev->umc.retire_unit;
|
||||
|
||||
soc_pa = paddr_out->pa.pa;
|
||||
channel_index = paddr_out->pa.channel_idx;
|
||||
/* clear loop bits in soc physical address */
|
||||
for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++)
|
||||
soc_pa &= ~BIT_ULL(loop_bits[i]);
|
||||
for (i = 0; i < bit_num; i++)
|
||||
soc_pa &= ~BIT_ULL(flip_bits[i]);
|
||||
|
||||
paddr_out->pa.pa = soc_pa;
|
||||
/* get column bit 0 and 1 in mca address */
|
||||
@@ -259,10 +282,10 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
|
||||
goto out;
|
||||
|
||||
/* loop for all possibilities of retired bits */
|
||||
for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; column++) {
|
||||
for (column = 0; column < retire_unit; column++) {
|
||||
soc_pa = paddr_out->pa.pa;
|
||||
for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++)
|
||||
soc_pa |= (((column >> i) & 0x1ULL) << loop_bits[i]);
|
||||
for (i = 0; i < bit_num; i++)
|
||||
soc_pa |= (((column >> i) & 0x1ULL) << flip_bits[i]);
|
||||
|
||||
col = ((column & 0x7) << 2) | col_lower;
|
||||
/* add row bit 13 */
|
||||
@@ -684,5 +707,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
|
||||
.update_ecc_status = umc_v12_0_update_ecc_status,
|
||||
.convert_ras_err_addr = umc_v12_0_convert_error_address,
|
||||
.get_die_id_from_pa = umc_v12_0_get_die_id,
|
||||
.get_retire_flip_bits = umc_v12_0_get_retire_flip_bits,
|
||||
};
|
||||
|
||||
|
||||
@@ -55,8 +55,6 @@
|
||||
#define UMC_V12_0_NA_MAP_PA_NUM 8
|
||||
/* R13 bit shift should be considered, double the number */
|
||||
#define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2)
|
||||
/* C2, C3, C4, R13, four bits in MCA address are looped in retirement */
|
||||
#define UMC_V12_0_RETIRE_LOOP_BITS 4
|
||||
|
||||
/* column bits in SOC physical address */
|
||||
#define UMC_V12_0_PA_C2_BIT 15
|
||||
|
||||
Reference in New Issue
Block a user