IB/mlx4: Make sure inline data segments don't cross a 64 byte boundary
Inline data segments in send WQEs are not allowed to cross a 64 byte boundary. We use inline data segments to hold the UD headers for MLX QPs (QP0 and QP1). A send with GRH on QP1 will have a UD header that is too big to fit in a single inline data segment without crossing a 64 byte boundary, so split the header into two inline data segments. Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
@@ -192,6 +192,8 @@ static int send_wqe_overhead(enum ib_qp_type type)
|
|||||||
case IB_QPT_GSI:
|
case IB_QPT_GSI:
|
||||||
return sizeof (struct mlx4_wqe_ctrl_seg) +
|
return sizeof (struct mlx4_wqe_ctrl_seg) +
|
||||||
ALIGN(MLX4_IB_UD_HEADER_SIZE +
|
ALIGN(MLX4_IB_UD_HEADER_SIZE +
|
||||||
|
DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
|
||||||
|
MLX4_INLINE_ALIGN) *
|
||||||
sizeof (struct mlx4_wqe_inline_seg),
|
sizeof (struct mlx4_wqe_inline_seg),
|
||||||
sizeof (struct mlx4_wqe_data_seg)) +
|
sizeof (struct mlx4_wqe_data_seg)) +
|
||||||
ALIGN(4 +
|
ALIGN(4 +
|
||||||
@@ -1049,6 +1051,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
|
|||||||
u16 pkey;
|
u16 pkey;
|
||||||
int send_size;
|
int send_size;
|
||||||
int header_size;
|
int header_size;
|
||||||
|
int spc;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
send_size = 0;
|
send_size = 0;
|
||||||
@@ -1124,10 +1127,43 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
|
|||||||
printk("\n");
|
printk("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
inl->byte_count = cpu_to_be32(1 << 31 | header_size);
|
/*
|
||||||
memcpy(inl + 1, sqp->header_buf, header_size);
|
* Inline data segments may not cross a 64 byte boundary. If
|
||||||
|
* our UD header is bigger than the space available up to the
|
||||||
|
* next 64 byte boundary in the WQE, use two inline data
|
||||||
|
* segments to hold the UD header.
|
||||||
|
*/
|
||||||
|
spc = MLX4_INLINE_ALIGN -
|
||||||
|
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
|
||||||
|
if (header_size <= spc) {
|
||||||
|
inl->byte_count = cpu_to_be32(1 << 31 | header_size);
|
||||||
|
memcpy(inl + 1, sqp->header_buf, header_size);
|
||||||
|
i = 1;
|
||||||
|
} else {
|
||||||
|
inl->byte_count = cpu_to_be32(1 << 31 | spc);
|
||||||
|
memcpy(inl + 1, sqp->header_buf, spc);
|
||||||
|
|
||||||
return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
|
inl = (void *) (inl + 1) + spc;
|
||||||
|
memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
|
||||||
|
/*
|
||||||
|
* Need a barrier here to make sure all the data is
|
||||||
|
* visible before the byte_count field is set.
|
||||||
|
* Otherwise the HCA prefetcher could grab the 64-byte
|
||||||
|
* chunk with this inline segment and get a valid (!=
|
||||||
|
* 0xffffffff) byte count but stale data, and end up
|
||||||
|
* generating a packet with bad headers.
|
||||||
|
*
|
||||||
|
* The first inline segment's byte_count field doesn't
|
||||||
|
* need a barrier, because it comes after a
|
||||||
|
* control/MLX segment and therefore is at an offset
|
||||||
|
* of 16 mod 64.
|
||||||
|
*/
|
||||||
|
wmb();
|
||||||
|
inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
|
||||||
|
i = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
|
static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
|
||||||
|
@@ -269,6 +269,10 @@ struct mlx4_wqe_data_seg {
|
|||||||
__be64 addr;
|
__be64 addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
MLX4_INLINE_ALIGN = 64,
|
||||||
|
};
|
||||||
|
|
||||||
struct mlx4_wqe_inline_seg {
|
struct mlx4_wqe_inline_seg {
|
||||||
__be32 byte_count;
|
__be32 byte_count;
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user