IB/mthca: Recover from catastrophic errors
Trigger device remove and then add when a catastrophic error is detected in hardware. This, in turn, will cause a device reset, which we hope will recover from the catastrophic condition. Since this might interefere with debugging the root cause, add a module option to suppress this behaviour. Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il> Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
committed by
Roland Dreier
parent
07eeec0627
commit
b3b30f5e8a
@@ -45,6 +45,7 @@
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
@@ -283,8 +284,11 @@ struct mthca_catas_err {
|
||||
unsigned long stop;
|
||||
u32 size;
|
||||
struct timer_list timer;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
extern struct mutex mthca_device_mutex;
|
||||
|
||||
struct mthca_dev {
|
||||
struct ib_device ib_dev;
|
||||
struct pci_dev *pdev;
|
||||
@@ -450,6 +454,9 @@ void mthca_unregister_device(struct mthca_dev *dev);
|
||||
|
||||
void mthca_start_catas_poll(struct mthca_dev *dev);
|
||||
void mthca_stop_catas_poll(struct mthca_dev *dev);
|
||||
int __mthca_restart_one(struct pci_dev *pdev);
|
||||
int mthca_catas_init(void);
|
||||
void mthca_catas_cleanup(void);
|
||||
|
||||
int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
|
||||
void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
|
||||
|
Reference in New Issue
Block a user