mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-22 07:27:12 +08:00
drm/amdgpu: refine eeprom data check
add eeprom data checksum check before driver unload. reset eeprom and save correct data to eeprom when check failed Signed-off-by: ganglxie <ganglxie@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -2512,6 +2512,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
|
||||
struct drm_device *dev = pci_get_drvdata(pdev);
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
|
||||
amdgpu_ras_eeprom_check_and_recover(adev);
|
||||
amdgpu_xcp_dev_unplug(adev);
|
||||
amdgpu_gmc_prepare_nps_mode_change(adev);
|
||||
drm_dev_unplug(dev);
|
||||
|
||||
@@ -1531,3 +1531,31 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
|
||||
|
||||
return res < 0 ? res : 0;
|
||||
}
|
||||
|
||||
void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
struct amdgpu_ras_eeprom_control *control;
|
||||
int res;
|
||||
|
||||
if (!__is_ras_eeprom_supported(adev) || !ras)
|
||||
return;
|
||||
control = &ras->eeprom_control;
|
||||
if (!control->is_eeprom_valid)
|
||||
return;
|
||||
res = __verify_ras_table_checksum(control);
|
||||
if (res) {
|
||||
dev_warn(adev->dev,
|
||||
"RAS table incorrect checksum or error:%d, try to recover\n",
|
||||
res);
|
||||
if (!amdgpu_ras_eeprom_reset_table(control))
|
||||
if (!amdgpu_ras_save_bad_pages(adev, NULL))
|
||||
if (!__verify_ras_table_checksum(control)) {
|
||||
dev_info(adev->dev, "RAS table recovery succeed\n");
|
||||
return;
|
||||
}
|
||||
dev_err(adev->dev, "RAS table recovery failed\n");
|
||||
control->is_eeprom_valid = false;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -161,6 +161,8 @@ void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
|
||||
|
||||
int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control);
|
||||
|
||||
void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev);
|
||||
|
||||
extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
|
||||
extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user