IB/ipath: Log "active" time and some errors to EEPROM
We currently track various errors, now we enhance that capability by logging some of them to EEPROM. We also now log a cumulative "active" time defined by traffic though the InfiniPath HCA beyond the normal SM traffic. Signed-off-by: Michael Albaugh <michael.albaugh@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
committed by
Roland Dreier
parent
8e9ab3f1c9
commit
aecd3b5ab1
@@ -2005,6 +2005,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
|
|||||||
~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
|
~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
|
||||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
|
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
|
||||||
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
|
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
|
||||||
|
|
||||||
|
ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
|
||||||
|
ipath_update_eeprom_log(dd);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -367,8 +367,8 @@ bail:
|
|||||||
* @len: number of bytes to receive
|
* @len: number of bytes to receive
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
|
static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
|
||||||
void *buffer, int len)
|
u8 eeprom_offset, void *buffer, int len)
|
||||||
{
|
{
|
||||||
/* compiler complains unless initialized */
|
/* compiler complains unless initialized */
|
||||||
u8 single_byte = 0;
|
u8 single_byte = 0;
|
||||||
@@ -418,6 +418,7 @@ bail:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ipath_eeprom_write - writes data to the eeprom via I2C
|
* ipath_eeprom_write - writes data to the eeprom via I2C
|
||||||
* @dd: the infinipath device
|
* @dd: the infinipath device
|
||||||
@@ -425,8 +426,8 @@ bail:
|
|||||||
* @buffer: data to write
|
* @buffer: data to write
|
||||||
* @len: number of bytes to write
|
* @len: number of bytes to write
|
||||||
*/
|
*/
|
||||||
int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
|
int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
|
||||||
const void *buffer, int len)
|
const void *buffer, int len)
|
||||||
{
|
{
|
||||||
u8 single_byte;
|
u8 single_byte;
|
||||||
int sub_len;
|
int sub_len;
|
||||||
@@ -500,6 +501,38 @@ bail:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The public entry-points ipath_eeprom_read() and ipath_eeprom_write()
|
||||||
|
* are now just wrappers around the internal functions.
|
||||||
|
*/
|
||||||
|
int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
|
||||||
|
void *buff, int len)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = down_interruptible(&dd->ipath_eep_sem);
|
||||||
|
if (!ret) {
|
||||||
|
ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
|
||||||
|
up(&dd->ipath_eep_sem);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
|
||||||
|
const void *buff, int len)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = down_interruptible(&dd->ipath_eep_sem);
|
||||||
|
if (!ret) {
|
||||||
|
ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
|
||||||
|
up(&dd->ipath_eep_sem);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static u8 flash_csum(struct ipath_flash *ifp, int adjust)
|
static u8 flash_csum(struct ipath_flash *ifp, int adjust)
|
||||||
{
|
{
|
||||||
u8 *ip = (u8 *) ifp;
|
u8 *ip = (u8 *) ifp;
|
||||||
@@ -527,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
|
|||||||
void *buf;
|
void *buf;
|
||||||
struct ipath_flash *ifp;
|
struct ipath_flash *ifp;
|
||||||
__be64 guid;
|
__be64 guid;
|
||||||
int len;
|
int len, eep_stat;
|
||||||
u8 csum, *bguid;
|
u8 csum, *bguid;
|
||||||
int t = dd->ipath_unit;
|
int t = dd->ipath_unit;
|
||||||
struct ipath_devdata *dd0 = ipath_lookup(0);
|
struct ipath_devdata *dd0 = ipath_lookup(0);
|
||||||
@@ -571,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
|
|||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ipath_eeprom_read(dd, 0, buf, len)) {
|
down(&dd->ipath_eep_sem);
|
||||||
|
eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
|
||||||
|
up(&dd->ipath_eep_sem);
|
||||||
|
|
||||||
|
if (eep_stat) {
|
||||||
ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
|
ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
@@ -646,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
|
|||||||
ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
|
ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
|
||||||
(unsigned long long) be64_to_cpu(dd->ipath_guid));
|
(unsigned long long) be64_to_cpu(dd->ipath_guid));
|
||||||
|
|
||||||
|
memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
|
||||||
|
/*
|
||||||
|
* Power-on (actually "active") hours are kept as little-endian value
|
||||||
|
* in EEPROM, but as seconds in a (possibly as small as 24-bit)
|
||||||
|
* atomic_t while running.
|
||||||
|
*/
|
||||||
|
atomic_set(&dd->ipath_active_time, 0);
|
||||||
|
dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
|
||||||
|
|
||||||
done:
|
done:
|
||||||
vfree(buf);
|
vfree(buf);
|
||||||
|
|
||||||
bail:;
|
bail:;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ipath_update_eeprom_log - copy active-time and error counters to eeprom
|
||||||
|
* @dd: the infinipath device
|
||||||
|
*
|
||||||
|
* Although the time is kept as seconds in the ipath_devdata struct, it is
|
||||||
|
* rounded to hours for re-write, as we have only 16 bits in EEPROM.
|
||||||
|
* First-cut code reads whole (expected) struct ipath_flash, modifies,
|
||||||
|
* re-writes. Future direction: read/write only what we need, assuming
|
||||||
|
* that the EEPROM had to have been "good enough" for driver init, and
|
||||||
|
* if not, we aren't making it worse.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
int ipath_update_eeprom_log(struct ipath_devdata *dd)
|
||||||
|
{
|
||||||
|
void *buf;
|
||||||
|
struct ipath_flash *ifp;
|
||||||
|
int len, hi_water;
|
||||||
|
uint32_t new_time, new_hrs;
|
||||||
|
u8 csum;
|
||||||
|
int ret, idx;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
/* first, check if we actually need to do anything. */
|
||||||
|
ret = 0;
|
||||||
|
for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
|
||||||
|
if (dd->ipath_eep_st_new_errs[idx]) {
|
||||||
|
ret = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new_time = atomic_read(&dd->ipath_active_time);
|
||||||
|
|
||||||
|
if (ret == 0 && new_time < 3600)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The quick-check above determined that there is something worthy
|
||||||
|
* of logging, so get current contents and do a more detailed idea.
|
||||||
|
*/
|
||||||
|
len = offsetof(struct ipath_flash, if_future);
|
||||||
|
buf = vmalloc(len);
|
||||||
|
ret = 1;
|
||||||
|
if (!buf) {
|
||||||
|
ipath_dev_err(dd, "Couldn't allocate memory to read %u "
|
||||||
|
"bytes from eeprom for logging\n", len);
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Grab semaphore and read current EEPROM. If we get an
|
||||||
|
* error, let go, but if not, keep it until we finish write.
|
||||||
|
*/
|
||||||
|
ret = down_interruptible(&dd->ipath_eep_sem);
|
||||||
|
if (ret) {
|
||||||
|
ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
|
||||||
|
goto free_bail;
|
||||||
|
}
|
||||||
|
ret = ipath_eeprom_internal_read(dd, 0, buf, len);
|
||||||
|
if (ret) {
|
||||||
|
up(&dd->ipath_eep_sem);
|
||||||
|
ipath_dev_err(dd, "Unable read EEPROM for logging\n");
|
||||||
|
goto free_bail;
|
||||||
|
}
|
||||||
|
ifp = (struct ipath_flash *)buf;
|
||||||
|
|
||||||
|
csum = flash_csum(ifp, 0);
|
||||||
|
if (csum != ifp->if_csum) {
|
||||||
|
up(&dd->ipath_eep_sem);
|
||||||
|
ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
|
||||||
|
csum, ifp->if_csum);
|
||||||
|
ret = 1;
|
||||||
|
goto free_bail;
|
||||||
|
}
|
||||||
|
hi_water = 0;
|
||||||
|
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
|
||||||
|
for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
|
||||||
|
int new_val = dd->ipath_eep_st_new_errs[idx];
|
||||||
|
if (new_val) {
|
||||||
|
/*
|
||||||
|
* If we have seen any errors, add to EEPROM values
|
||||||
|
* We need to saturate at 0xFF (255) and we also
|
||||||
|
* would need to adjust the checksum if we were
|
||||||
|
* trying to minimize EEPROM traffic
|
||||||
|
* Note that we add to actual current count in EEPROM,
|
||||||
|
* in case it was altered while we were running.
|
||||||
|
*/
|
||||||
|
new_val += ifp->if_errcntp[idx];
|
||||||
|
if (new_val > 0xFF)
|
||||||
|
new_val = 0xFF;
|
||||||
|
if (ifp->if_errcntp[idx] != new_val) {
|
||||||
|
ifp->if_errcntp[idx] = new_val;
|
||||||
|
hi_water = offsetof(struct ipath_flash,
|
||||||
|
if_errcntp) + idx;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* update our shadow (used to minimize EEPROM
|
||||||
|
* traffic), to match what we are about to write.
|
||||||
|
*/
|
||||||
|
dd->ipath_eep_st_errs[idx] = new_val;
|
||||||
|
dd->ipath_eep_st_new_errs[idx] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* now update active-time. We would like to round to the nearest hour
|
||||||
|
* but unless atomic_t are sure to be proper signed ints we cannot,
|
||||||
|
* because we need to account for what we "transfer" to EEPROM and
|
||||||
|
* if we log an hour at 31 minutes, then we would need to set
|
||||||
|
* active_time to -29 to accurately count the _next_ hour.
|
||||||
|
*/
|
||||||
|
if (new_time > 3600) {
|
||||||
|
new_hrs = new_time / 3600;
|
||||||
|
atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
|
||||||
|
new_hrs += dd->ipath_eep_hrs;
|
||||||
|
if (new_hrs > 0xFFFF)
|
||||||
|
new_hrs = 0xFFFF;
|
||||||
|
dd->ipath_eep_hrs = new_hrs;
|
||||||
|
if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
|
||||||
|
ifp->if_powerhour[0] = new_hrs & 0xFF;
|
||||||
|
hi_water = offsetof(struct ipath_flash, if_powerhour);
|
||||||
|
}
|
||||||
|
if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
|
||||||
|
ifp->if_powerhour[1] = new_hrs >> 8;
|
||||||
|
hi_water = offsetof(struct ipath_flash, if_powerhour)
|
||||||
|
+ 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* There is a tiny possibility that we could somehow fail to write
|
||||||
|
* the EEPROM after updating our shadows, but problems from holding
|
||||||
|
* the spinlock too long are a much bigger issue.
|
||||||
|
*/
|
||||||
|
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
|
||||||
|
if (hi_water) {
|
||||||
|
/* we made some change to the data, uopdate cksum and write */
|
||||||
|
csum = flash_csum(ifp, 1);
|
||||||
|
ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
|
||||||
|
}
|
||||||
|
up(&dd->ipath_eep_sem);
|
||||||
|
if (ret)
|
||||||
|
ipath_dev_err(dd, "Failed updating EEPROM\n");
|
||||||
|
|
||||||
|
free_bail:
|
||||||
|
vfree(buf);
|
||||||
|
bail:
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ipath_inc_eeprom_err - increment one of the four error counters
|
||||||
|
* that are logged to EEPROM.
|
||||||
|
* @dd: the infinipath device
|
||||||
|
* @eidx: 0..3, the counter to increment
|
||||||
|
* @incr: how much to add
|
||||||
|
*
|
||||||
|
* Each counter is 8-bits, and saturates at 255 (0xFF). They
|
||||||
|
* are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
|
||||||
|
* is called, but it can only be called in a context that allows sleep.
|
||||||
|
* This function can be called even at interrupt level.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
|
||||||
|
{
|
||||||
|
uint new_val;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
|
||||||
|
new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
|
||||||
|
if (new_val > 255)
|
||||||
|
new_val = 255;
|
||||||
|
dd->ipath_eep_st_new_errs[eidx] = new_val;
|
||||||
|
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
@@ -440,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
|
|||||||
u32 bits, ctrl;
|
u32 bits, ctrl;
|
||||||
int isfatal = 0;
|
int isfatal = 0;
|
||||||
char bitsmsg[64];
|
char bitsmsg[64];
|
||||||
|
int log_idx;
|
||||||
|
|
||||||
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
|
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
|
||||||
|
|
||||||
@@ -468,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
|
|||||||
|
|
||||||
hwerrs &= dd->ipath_hwerrmask;
|
hwerrs &= dd->ipath_hwerrmask;
|
||||||
|
|
||||||
|
/* We log some errors to EEPROM, check if we have any of those. */
|
||||||
|
for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
|
||||||
|
if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
|
||||||
|
ipath_inc_eeprom_err(dd, log_idx, 1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* make sure we get this much out, unless told to be quiet,
|
* make sure we get this much out, unless told to be quiet,
|
||||||
* it's a parity error we may recover from,
|
* it's a parity error we may recover from,
|
||||||
@@ -1171,6 +1177,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd)
|
|||||||
|
|
||||||
dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
|
dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
|
||||||
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
|
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
|
||||||
|
* 2 is Some Misc, 3 is reserved for future.
|
||||||
|
*/
|
||||||
|
dd->ipath_eep_st_masks[0].hwerrs_to_log =
|
||||||
|
INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
|
||||||
|
INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
|
||||||
|
|
||||||
|
dd->ipath_eep_st_masks[1].hwerrs_to_log =
|
||||||
|
INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
|
||||||
|
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
|
||||||
|
|
||||||
|
dd->ipath_eep_st_masks[2].errs_to_log =
|
||||||
|
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -340,6 +340,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
|
|||||||
u32 bits, ctrl;
|
u32 bits, ctrl;
|
||||||
int isfatal = 0;
|
int isfatal = 0;
|
||||||
char bitsmsg[64];
|
char bitsmsg[64];
|
||||||
|
int log_idx;
|
||||||
|
|
||||||
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
|
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
|
||||||
if (!hwerrs) {
|
if (!hwerrs) {
|
||||||
@@ -367,6 +368,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
|
|||||||
|
|
||||||
hwerrs &= dd->ipath_hwerrmask;
|
hwerrs &= dd->ipath_hwerrmask;
|
||||||
|
|
||||||
|
/* We log some errors to EEPROM, check if we have any of those. */
|
||||||
|
for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
|
||||||
|
if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
|
||||||
|
ipath_inc_eeprom_err(dd, log_idx, 1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* make sure we get this much out, unless told to be quiet,
|
* make sure we get this much out, unless told to be quiet,
|
||||||
* or it's occurred within the last 5 seconds
|
* or it's occurred within the last 5 seconds
|
||||||
@@ -950,6 +956,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
|
|||||||
|
|
||||||
dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
|
dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
|
||||||
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
|
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
|
||||||
|
* 2 is Some Misc, 3 is reserved for future.
|
||||||
|
*/
|
||||||
|
dd->ipath_eep_st_masks[0].hwerrs_to_log =
|
||||||
|
INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
|
||||||
|
INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
|
||||||
|
|
||||||
|
/* Ignore errors in PIO/PBC on systems with unordered write-combining */
|
||||||
|
if (ipath_unordered_wc())
|
||||||
|
dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;
|
||||||
|
|
||||||
|
dd->ipath_eep_st_masks[1].hwerrs_to_log =
|
||||||
|
INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
|
||||||
|
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
|
||||||
|
|
||||||
|
dd->ipath_eep_st_masks[2].errs_to_log =
|
||||||
|
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup the MSI stuff again after a reset. I'd like to just call
|
/* setup the MSI stuff again after a reset. I'd like to just call
|
||||||
|
@@ -341,6 +341,8 @@ static int init_chip_first(struct ipath_devdata *dd,
|
|||||||
spin_lock_init(&dd->ipath_tid_lock);
|
spin_lock_init(&dd->ipath_tid_lock);
|
||||||
|
|
||||||
spin_lock_init(&dd->ipath_gpio_lock);
|
spin_lock_init(&dd->ipath_gpio_lock);
|
||||||
|
spin_lock_init(&dd->ipath_eep_st_lock);
|
||||||
|
sema_init(&dd->ipath_eep_sem, 1);
|
||||||
|
|
||||||
done:
|
done:
|
||||||
*pdp = pd;
|
*pdp = pd;
|
||||||
|
@@ -505,6 +505,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
|
|||||||
int i, iserr = 0;
|
int i, iserr = 0;
|
||||||
int chkerrpkts = 0, noprint = 0;
|
int chkerrpkts = 0, noprint = 0;
|
||||||
unsigned supp_msgs;
|
unsigned supp_msgs;
|
||||||
|
int log_idx;
|
||||||
|
|
||||||
supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
|
supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
|
||||||
|
|
||||||
@@ -518,6 +519,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
|
|||||||
if (errs & INFINIPATH_E_HARDWARE) {
|
if (errs & INFINIPATH_E_HARDWARE) {
|
||||||
/* reuse same msg buf */
|
/* reuse same msg buf */
|
||||||
dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
|
dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
|
||||||
|
} else {
|
||||||
|
u64 mask;
|
||||||
|
for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
|
||||||
|
mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
|
||||||
|
if (errs & mask)
|
||||||
|
ipath_inc_eeprom_err(dd, log_idx, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!noprint && (errs & ~dd->ipath_e_bitsextant))
|
if (!noprint && (errs & ~dd->ipath_e_bitsextant))
|
||||||
|
@@ -57,6 +57,24 @@
|
|||||||
extern struct infinipath_stats ipath_stats;
|
extern struct infinipath_stats ipath_stats;
|
||||||
|
|
||||||
#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
|
#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
|
||||||
|
/*
|
||||||
|
* First-cut critierion for "device is active" is
|
||||||
|
* two thousand dwords combined Tx, Rx traffic per
|
||||||
|
* 5-second interval. SMA packets are 64 dwords,
|
||||||
|
* and occur "a few per second", presumably each way.
|
||||||
|
*/
|
||||||
|
#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
|
||||||
|
/*
|
||||||
|
* Struct used to indicate which errors are logged in each of the
|
||||||
|
* error-counters that are logged to EEPROM. A counter is incremented
|
||||||
|
* _once_ (saturating at 255) for each event with any bits set in
|
||||||
|
* the error or hwerror register masks below.
|
||||||
|
*/
|
||||||
|
#define IPATH_EEP_LOG_CNT (4)
|
||||||
|
struct ipath_eep_log_mask {
|
||||||
|
u64 errs_to_log;
|
||||||
|
u64 hwerrs_to_log;
|
||||||
|
};
|
||||||
|
|
||||||
struct ipath_portdata {
|
struct ipath_portdata {
|
||||||
void **port_rcvegrbuf;
|
void **port_rcvegrbuf;
|
||||||
@@ -588,6 +606,24 @@ struct ipath_devdata {
|
|||||||
/* Used to flash LEDs in override mode */
|
/* Used to flash LEDs in override mode */
|
||||||
struct timer_list ipath_led_override_timer;
|
struct timer_list ipath_led_override_timer;
|
||||||
|
|
||||||
|
/* Support (including locks) for EEPROM logging of errors and time */
|
||||||
|
/* control access to actual counters, timer */
|
||||||
|
spinlock_t ipath_eep_st_lock;
|
||||||
|
/* control high-level access to EEPROM */
|
||||||
|
struct semaphore ipath_eep_sem;
|
||||||
|
/* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
|
||||||
|
uint64_t ipath_traffic_wds;
|
||||||
|
/* active time is kept in seconds, but logged in hours */
|
||||||
|
atomic_t ipath_active_time;
|
||||||
|
/* Below are nominal shadow of EEPROM, new since last EEPROM update */
|
||||||
|
uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
|
||||||
|
uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
|
||||||
|
uint16_t ipath_eep_hrs;
|
||||||
|
/*
|
||||||
|
* masks for which bits of errs, hwerrs that cause
|
||||||
|
* each of the counters to increment.
|
||||||
|
*/
|
||||||
|
struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Private data for file operations */
|
/* Private data for file operations */
|
||||||
@@ -726,6 +762,8 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
|
|||||||
void ipath_init_iba6120_funcs(struct ipath_devdata *);
|
void ipath_init_iba6120_funcs(struct ipath_devdata *);
|
||||||
void ipath_init_iba6110_funcs(struct ipath_devdata *);
|
void ipath_init_iba6110_funcs(struct ipath_devdata *);
|
||||||
void ipath_get_eeprom_info(struct ipath_devdata *);
|
void ipath_get_eeprom_info(struct ipath_devdata *);
|
||||||
|
int ipath_update_eeprom_log(struct ipath_devdata *dd);
|
||||||
|
void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
|
||||||
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
|
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
|
||||||
void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
|
void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
|
||||||
|
|
||||||
|
@@ -55,6 +55,7 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
|
|||||||
u64 val64;
|
u64 val64;
|
||||||
unsigned long t0, t1;
|
unsigned long t0, t1;
|
||||||
u64 ret;
|
u64 ret;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
t0 = jiffies;
|
t0 = jiffies;
|
||||||
/* If fast increment counters are only 32 bits, snapshot them,
|
/* If fast increment counters are only 32 bits, snapshot them,
|
||||||
@@ -91,12 +92,18 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
|
|||||||
if (creg == dd->ipath_cregs->cr_wordsendcnt) {
|
if (creg == dd->ipath_cregs->cr_wordsendcnt) {
|
||||||
if (val != dd->ipath_lastsword) {
|
if (val != dd->ipath_lastsword) {
|
||||||
dd->ipath_sword += val - dd->ipath_lastsword;
|
dd->ipath_sword += val - dd->ipath_lastsword;
|
||||||
|
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
|
||||||
|
dd->ipath_traffic_wds += val - dd->ipath_lastsword;
|
||||||
|
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
|
||||||
dd->ipath_lastsword = val;
|
dd->ipath_lastsword = val;
|
||||||
}
|
}
|
||||||
val64 = dd->ipath_sword;
|
val64 = dd->ipath_sword;
|
||||||
} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
|
} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
|
||||||
if (val != dd->ipath_lastrword) {
|
if (val != dd->ipath_lastrword) {
|
||||||
dd->ipath_rword += val - dd->ipath_lastrword;
|
dd->ipath_rword += val - dd->ipath_lastrword;
|
||||||
|
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
|
||||||
|
dd->ipath_traffic_wds += val - dd->ipath_lastrword;
|
||||||
|
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
|
||||||
dd->ipath_lastrword = val;
|
dd->ipath_lastrword = val;
|
||||||
}
|
}
|
||||||
val64 = dd->ipath_rword;
|
val64 = dd->ipath_rword;
|
||||||
@@ -200,6 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
|
|||||||
struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
|
struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
|
||||||
u32 val;
|
u32 val;
|
||||||
static unsigned cnt;
|
static unsigned cnt;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* don't access the chip while running diags, or memory diags can
|
* don't access the chip while running diags, or memory diags can
|
||||||
@@ -210,9 +218,20 @@ void ipath_get_faststats(unsigned long opaque)
|
|||||||
/* but re-arm the timer, for diags case; won't hurt other */
|
/* but re-arm the timer, for diags case; won't hurt other */
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We now try to maintain a "active timer", based on traffic
|
||||||
|
* exceeding a threshold, so we need to check the word-counts
|
||||||
|
* even if they are 64-bit.
|
||||||
|
*/
|
||||||
|
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
|
||||||
|
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
|
||||||
|
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
|
||||||
|
if (dd->ipath_traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
|
||||||
|
atomic_add(5, &dd->ipath_active_time); /* S/B #define */
|
||||||
|
dd->ipath_traffic_wds = 0;
|
||||||
|
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
|
||||||
|
|
||||||
if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
|
if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
|
||||||
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
|
|
||||||
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
|
|
||||||
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
|
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
|
||||||
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
|
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
|
||||||
}
|
}
|
||||||
|
@@ -613,6 +613,26 @@ static ssize_t store_led_override(struct device *dev,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t show_logged_errs(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct ipath_devdata *dd = dev_get_drvdata(dev);
|
||||||
|
int idx, count;
|
||||||
|
|
||||||
|
/* force consistency with actual EEPROM */
|
||||||
|
if (ipath_update_eeprom_log(dd) != 0)
|
||||||
|
return -ENXIO;
|
||||||
|
|
||||||
|
count = 0;
|
||||||
|
for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
|
||||||
|
count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
|
||||||
|
dd->ipath_eep_st_errs[idx],
|
||||||
|
idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
|
static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
|
||||||
static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
|
static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
|
||||||
@@ -643,6 +663,7 @@ static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
|
|||||||
static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
|
static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
|
||||||
static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
|
static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
|
||||||
static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
|
static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
|
||||||
|
static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
|
||||||
|
|
||||||
static struct attribute *dev_attributes[] = {
|
static struct attribute *dev_attributes[] = {
|
||||||
&dev_attr_guid.attr,
|
&dev_attr_guid.attr,
|
||||||
@@ -660,6 +681,7 @@ static struct attribute *dev_attributes[] = {
|
|||||||
&dev_attr_enabled.attr,
|
&dev_attr_enabled.attr,
|
||||||
&dev_attr_rx_pol_inv.attr,
|
&dev_attr_rx_pol_inv.attr,
|
||||||
&dev_attr_led_override.attr,
|
&dev_attr_led_override.attr,
|
||||||
|
&dev_attr_logged_errors.attr,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user