mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	net: hns3: fix to stop multiple HNS reset due to the AER changes
The commitbfcb79fca1("PCI/ERR: Run error recovery callbacks for all affected devices") affected the non-fatal error recovery logic for the HNS and RDMA devices. This is because each HNS PF under PCIe bus receive callbacks from the AER driver when an error is reported for one of the PF. This causes unwanted PF resets because the HNS decides which PF to reset based on the reset type set. The HNS error handling code sets the reset type based on the hw error type detected. This patch provides fix for the above issue for the recovery of the hw errors in the HNS and RDMA devices. This patch needs backporting to the kernel v5.0+ Fixes:332fbf5765("net: hns3: add handling of hw ras errors using new set of commands") Reported-by: Xiaofei Tan <tanxiaofei@huawei.com> Signed-off-by: Shiju Jose <shiju.jose@huawei.com> Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									083b78a9ed
								
							
						
					
					
						commit
						69b51bbb03
					
				| @ -194,6 +194,7 @@ struct hnae3_ae_dev { | |||||||
| 	const struct hnae3_ae_ops *ops; | 	const struct hnae3_ae_ops *ops; | ||||||
| 	struct list_head node; | 	struct list_head node; | ||||||
| 	u32 flag; | 	u32 flag; | ||||||
|  | 	u8 override_pci_need_reset; /* fix to stop multiple reset happening */ | ||||||
| 	enum hnae3_dev_type dev_type; | 	enum hnae3_dev_type dev_type; | ||||||
| 	enum hnae3_reset_type reset_type; | 	enum hnae3_reset_type reset_type; | ||||||
| 	void *priv; | 	void *priv; | ||||||
|  | |||||||
| @ -1850,7 +1850,9 @@ static pci_ers_result_t hns3_slot_reset(struct pci_dev *pdev) | |||||||
| 
 | 
 | ||||||
| 	/* request the reset */ | 	/* request the reset */ | ||||||
| 	if (ae_dev->ops->reset_event) { | 	if (ae_dev->ops->reset_event) { | ||||||
| 		ae_dev->ops->reset_event(pdev, NULL); | 		if (!ae_dev->override_pci_need_reset) | ||||||
|  | 			ae_dev->ops->reset_event(pdev, NULL); | ||||||
|  | 
 | ||||||
| 		return PCI_ERS_RESULT_RECOVERED; | 		return PCI_ERS_RESULT_RECOVERED; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1317,8 +1317,10 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) | |||||||
| 		hclge_handle_all_ras_errors(hdev); | 		hclge_handle_all_ras_errors(hdev); | ||||||
| 	} else { | 	} else { | ||||||
| 		if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || | 		if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || | ||||||
| 		    hdev->pdev->revision < 0x21) | 		    hdev->pdev->revision < 0x21) { | ||||||
|  | 			ae_dev->override_pci_need_reset = 1; | ||||||
| 			return PCI_ERS_RESULT_RECOVERED; | 			return PCI_ERS_RESULT_RECOVERED; | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { | 	if (status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { | ||||||
| @ -1327,8 +1329,11 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (status & HCLGE_RAS_REG_NFE_MASK || | 	if (status & HCLGE_RAS_REG_NFE_MASK || | ||||||
| 	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK) | 	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { | ||||||
|  | 		ae_dev->override_pci_need_reset = 0; | ||||||
| 		return PCI_ERS_RESULT_NEED_RESET; | 		return PCI_ERS_RESULT_NEED_RESET; | ||||||
|  | 	} | ||||||
|  | 	ae_dev->override_pci_need_reset = 1; | ||||||
| 
 | 
 | ||||||
| 	return PCI_ERS_RESULT_RECOVERED; | 	return PCI_ERS_RESULT_RECOVERED; | ||||||
| } | } | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Shiju Jose
						Shiju Jose