Merge branch 'tid-read' into hfi1-tid

This is the series for adding TID RDMA read. Kaike put in a lot of
effort into making this more consumable for review so special thanks to
him.

Allocating resources and tracing are separated out followed by patches
which build up the read request. Then we have the patches to receive
incoming TID RDMA read requests and handle integration with the RC
protocol.

See the cover letter of the original posting for more of a detailed
overview of TID.

https://www.spinics.net/lists/linux-rdma/msg66611.html

* tid-read:
  IB/hfi1: Add static trace for TID RDMA READ protocol
  IB/hfi1: Enable TID RDMA READ protocol
  IB/hfi1: Add interlock between a TID RDMA request and other requests
  IB/hfi1: Integrate TID RDMA READ protocol into RC protocol
  IB/hfi1: Increment the retry timeout value for TID RDMA READ request
  IB/hfi1: Add functions for restarting TID RDMA READ request
  IB/hfi1: Add TID RDMA handlers
  IB/hfi1: Add functions to receive TID RDMA READ response
  IB/hfi1: Add a function to build TID RDMA READ response
  IB/hfi1: Add functions to receive TID RDMA READ request
  IB/hfi1: Set PbcInsertHcrc for TID RDMA packets
  IB/hfi1: Add functions to build TID RDMA READ request
  IB/hfi1: Add static trace for flow and TID management functions
  IB/hfi1: Add the counter n_tidwait
  IB/hfi1: TID RDMA RcvArray programming and TID allocation
  IB/hfi1: TID RDMA flow allocation
  IB/hfi: Move RC functions into a header file

Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Doug Ledford 2019-02-05 17:59:43 -05:00
commit a2f3bde881
27 changed files with 4668 additions and 172 deletions

View File

@ -4253,6 +4253,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_pio_drain),
[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
access_sw_kmem_wait),
[C_SW_TID_WAIT] = CNTR_ELEM("TidWait", 0, 0, CNTR_NORMAL,
hfi1_access_sw_tid_wait),
[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
access_sw_send_schedule),
[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",

View File

@ -927,6 +927,7 @@ enum {
C_SW_PIO_WAIT,
C_SW_PIO_DRAIN,
C_SW_KMEM_WAIT,
C_SW_TID_WAIT,
C_SW_SEND_SCHED,
C_SDMA_DESC_FETCHED_CNT,
C_SDMA_INT_CNT,

View File

@ -340,6 +340,10 @@ struct diag_pkt {
#define HFI1_PSM_IOC_BASE_SEQ 0x0
/* Number of BTH.PSN bits used for sequence number in expected rcvs */
#define HFI1_KDETH_BTH_SEQ_SHIFT 11
#define HFI1_KDETH_BTH_SEQ_MASK (BIT(HFI1_KDETH_BTH_SEQ_SHIFT) - 1)
static inline __u64 rhf_to_cpu(const __le32 *rbuf)
{
return __le64_to_cpu(*((__le64 *)rbuf));

View File

@ -1575,25 +1575,32 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
return -EINVAL;
}
void handle_eflags(struct hfi1_packet *packet)
static void show_eflags_errs(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
u32 rte = rhf_rcv_type_err(packet->rhf);
dd_dev_err(rcd->dd,
"receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
rcd->ctxt, packet->rhf,
packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
packet->rhf & RHF_DC_ERR ? "dc " : "",
packet->rhf & RHF_TID_ERR ? "tid " : "",
packet->rhf & RHF_LEN_ERR ? "len " : "",
packet->rhf & RHF_ECC_ERR ? "ecc " : "",
packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
rte);
}
void handle_eflags(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
rcv_hdrerr(rcd, rcd->ppd, packet);
if (rhf_err_flags(packet->rhf))
dd_dev_err(rcd->dd,
"receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
rcd->ctxt, packet->rhf,
packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
packet->rhf & RHF_DC_ERR ? "dc " : "",
packet->rhf & RHF_TID_ERR ? "tid " : "",
packet->rhf & RHF_LEN_ERR ? "len " : "",
packet->rhf & RHF_ECC_ERR ? "ecc " : "",
packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
rte);
show_eflags_errs(packet);
}
/*
@ -1699,11 +1706,14 @@ static int kdeth_process_expected(struct hfi1_packet *packet)
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
struct hfi1_ctxtdata *rcd = packet->rcd;
dd_dev_err(packet->rcd->dd,
"Unhandled expected packet received. Dropping.\n");
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
return RHF_RCV_CONTINUE;
}
hfi1_kdeth_expected_rcv(packet);
return RHF_RCV_CONTINUE;
}
@ -1712,11 +1722,17 @@ static int kdeth_process_eager(struct hfi1_packet *packet)
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
struct hfi1_ctxtdata *rcd = packet->rcd;
show_eflags_errs(packet);
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
return RHF_RCV_CONTINUE;
}
hfi1_kdeth_eager_rcv(packet);
return RHF_RCV_CONTINUE;
}

View File

@ -198,6 +198,14 @@ struct exp_tid_set {
};
typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
struct tid_queue {
struct list_head queue_head;
/* queue head for QP TID resource waiters */
u32 enqueue; /* count of tid enqueues */
u32 dequeue; /* count of tid dequeues */
};
struct hfi1_ctxtdata {
/* rcvhdrq base, needs mmap before useful */
void *rcvhdrq;
@ -291,6 +299,12 @@ struct hfi1_ctxtdata {
/* PSM Specific fields */
/* lock protecting all Expected TID data */
struct mutex exp_mutex;
/* lock protecting all Expected TID data of kernel contexts */
spinlock_t exp_lock;
/* Queue for QP's waiting for HW TID flows */
struct tid_queue flow_queue;
/* Queue for QP's waiting for HW receive array entries */
struct tid_queue rarr_queue;
/* when waiting for rcv or pioavail */
wait_queue_head_t wait;
/* uuid from PSM */
@ -323,6 +337,9 @@ struct hfi1_ctxtdata {
*/
u8 subctxt_cnt;
/* Bit mask to track free TID RDMA HW flows */
unsigned long flow_mask;
struct tid_flow_state flows[RXE_NUM_TID_FLOWS];
};
/**
@ -2103,7 +2120,7 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK |
#endif
HFI1_PKT_USER_SC_INTEGRITY;
else
else if (ctxt_type != SC_KERNEL)
base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
/* turn on send-side job key checks if !A0 */

View File

@ -370,6 +370,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
mutex_init(&rcd->exp_mutex);
spin_lock_init(&rcd->exp_lock);
INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
@ -472,6 +475,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
GFP_KERNEL, numa);
if (!rcd->opstats)
goto bail;
/* Initialize TID flow generations for the context */
hfi1_kern_init_ctxt_generations(rcd);
}
*context = rcd;
@ -771,6 +777,8 @@ static void enable_chip(struct hfi1_devdata *dd)
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
if (HFI1_CAP_IS_KSET(TID_RDMA))
rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
hfi1_rcvctrl(dd, rcvmask, rcd);
sc_enable(rcd->sc);
hfi1_rcd_put(rcd);
@ -1589,7 +1597,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
if (rcd) {
hfi1_clear_tids(rcd);
hfi1_free_ctxt_rcv_groups(rcd);
hfi1_free_ctxt(rcd);
}
}

View File

@ -319,6 +319,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
hfi1_setup_tid_rdma_wqe(qp, wqe);
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
@ -738,6 +739,7 @@ void flush_qp_waiters(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
flush_iowait(qp);
hfi1_tid_rdma_flush_wait(qp);
}
void stop_send_queue(struct rvt_qp *qp)
@ -745,6 +747,8 @@ void stop_send_queue(struct rvt_qp *qp)
struct hfi1_qp_priv *priv = qp->priv;
iowait_cancel_work(&priv->s_iowait);
if (cancel_work_sync(&priv->tid_rdma.trigger_work))
rvt_put_qp(qp);
}
void quiesce_qp(struct rvt_qp *qp)
@ -758,6 +762,7 @@ void quiesce_qp(struct rvt_qp *qp)
void notify_qp_reset(struct rvt_qp *qp)
{
hfi1_qp_kern_exp_rcv_clear_all(qp);
qp->r_adefered = 0;
clear_ahg(qp);

View File

@ -63,11 +63,13 @@ extern const struct rvt_operation_params hfi1_post_parms[];
* HFI1_S_AHG_VALID - ahg header valid on chip
* HFI1_S_AHG_CLEAR - have send engine clear ahg state
* HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
* HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
* HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
*/
#define HFI1_S_AHG_VALID 0x80000000
#define HFI1_S_AHG_CLEAR 0x40000000
#define HFI1_S_WAIT_PIO_DRAIN 0x20000000
#define HFI1_S_WAIT_TID_SPACE 0x10000000
#define HFI1_S_MIN_BIT_MASK 0x01000000
/*

View File

@ -51,28 +51,48 @@
#include "hfi.h"
#include "qp.h"
#include "rc.h"
#include "verbs_txreq.h"
#include "trace.h"
/* cut down ridiculously long IB macro names */
#define OP(x) RC_OP(x)
static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct hfi1_ibport *ibp);
static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
u8 *prev_ack, bool *scheduled)
__must_hold(&qp->s_lock)
{
u32 len;
struct rvt_ack_entry *e = NULL;
u8 i, p;
bool s = true;
len = delta_psn(psn, wqe->psn) * pmtu;
ss->sge = wqe->sg_list[0];
ss->sg_list = wqe->sg_list + 1;
ss->num_sge = wqe->wr.num_sge;
ss->total_len = wqe->length;
rvt_skip_sge(ss, len, false);
return wqe->length - len;
for (i = qp->r_head_ack_queue; ; i = p) {
if (i == qp->s_tail_ack_queue)
s = false;
if (i)
p = i - 1;
else
p = rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
if (p == qp->r_head_ack_queue) {
e = NULL;
break;
}
e = &qp->s_ack_queue[p];
if (!e->opcode) {
e = NULL;
break;
}
if (cmp_psn(psn, e->psn) >= 0) {
if (p == qp->s_tail_ack_queue &&
cmp_psn(psn, e->lpsn) <= 0)
s = false;
break;
}
}
if (prev)
*prev = p;
if (prev_ack)
*prev_ack = i;
if (scheduled)
*scheduled = s;
return e;
}
/**
@ -92,13 +112,16 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
{
struct rvt_ack_entry *e;
u32 hwords;
u32 len;
u32 bth0, bth2;
u32 len = 0;
u32 bth0 = 0, bth2 = 0;
u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
int middle = 0;
u32 pmtu = qp->pmtu;
struct hfi1_qp_priv *priv = qp->priv;
bool last_pkt;
u32 delta;
trace_hfi1_rsp_make_rc_ack(qp, 0);
lockdep_assert_held(&qp->s_lock);
/* Don't send an ACK if we aren't supposed to. */
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
@ -170,6 +193,26 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
hwords++;
qp->s_ack_rdma_psn = e->psn;
bth2 = mask_psn(qp->s_ack_rdma_psn++);
} else if (e->opcode == TID_OP(READ_REQ)) {
/*
* If a TID RDMA read response is being resent and
* we haven't seen the duplicate request yet,
* then stop sending the remaining responses the
* responder has seen until the requester re-sends it.
*/
len = e->rdma_sge.sge_length;
if (len && !e->rdma_sge.mr) {
qp->s_tail_ack_queue = qp->r_head_ack_queue;
goto bail;
}
/* Copy SGE state in case we need to resend */
ps->s_txreq->mr = e->rdma_sge.mr;
if (ps->s_txreq->mr)
rvt_get_mr(ps->s_txreq->mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
qp->s_ack_state = TID_OP(READ_RESP);
goto read_resp;
} else {
/* COMPARE_SWAP or FETCH_ADD */
ps->s_txreq->ss = NULL;
@ -207,6 +250,28 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
bth2 = mask_psn(qp->s_ack_rdma_psn++);
break;
case TID_OP(READ_RESP):
read_resp:
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
delta = hfi1_build_tid_rdma_read_resp(qp, e, ohdr, &bth0,
&bth1, &bth2, &len,
&last_pkt);
if (delta == 0)
goto error_qp;
hwords += delta;
if (last_pkt) {
e->sent = 1;
/*
* Increment qp->s_tail_ack_queue through s_ack_state
* transition.
*/
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
}
break;
case TID_OP(READ_REQ):
goto bail;
default:
normal:
/*
@ -236,7 +301,14 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
ps->s_txreq->hdr_dwords = hwords;
hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps);
return 1;
error_qp:
spin_unlock_irqrestore(&qp->s_lock, ps->flags);
spin_lock_irqsave(&qp->r_lock, ps->flags);
spin_lock(&qp->s_lock);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
spin_unlock(&qp->s_lock);
spin_unlock_irqrestore(&qp->r_lock, ps->flags);
spin_lock_irqsave(&qp->s_lock, ps->flags);
bail:
qp->s_ack_state = OP(ACKNOWLEDGE);
/*
@ -263,17 +335,22 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_other_headers *ohdr;
struct rvt_sge_state *ss;
struct rvt_sge_state *ss = NULL;
struct rvt_swqe *wqe;
u32 hwords;
u32 len;
u32 bth0 = 0, bth2;
struct hfi1_swqe_priv *wpriv;
struct tid_rdma_request *req = NULL;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
u32 hwords = 5;
u32 len = 0;
u32 bth0 = 0, bth2 = 0;
u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
u32 pmtu = qp->pmtu;
char newreq;
int middle = 0;
int delta;
struct tid_rdma_flow *flow = NULL;
trace_hfi1_sender_make_rc_req(qp);
lockdep_assert_held(&qp->s_lock);
ps->s_txreq = get_txreq(ps->dev, qp);
if (!ps->s_txreq)
@ -314,8 +391,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done_free_tx;
}
@ -334,6 +411,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/* Send a request. */
wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
check_s_state:
switch (qp->s_state) {
default:
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
@ -355,9 +433,13 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/*
* If a fence is requested, wait for previous
* RDMA read and atomic operations to finish.
* However, there is no need to guard against
* TID RDMA READ after TID RDMA READ.
*/
if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
qp->s_num_rd_atomic) {
qp->s_num_rd_atomic &&
(wqe->wr.opcode != IB_WR_TID_RDMA_READ ||
priv->pending_tid_r_segs < qp->s_num_rd_atomic)) {
qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail;
}
@ -402,6 +484,15 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
len = wqe->length;
ss = &qp->s_sge;
bth2 = mask_psn(qp->s_psn);
/*
* Interlock between various IB requests and TID RDMA
* if necessary.
*/
if ((priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) ||
hfi1_tid_rdma_wqe_interlock(qp, wqe))
goto bail;
switch (wqe->wr.opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
@ -483,16 +574,14 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
* Don't allow more operations to be started
* than the QP limits allow.
*/
if (newreq) {
if (qp->s_num_rd_atomic >=
qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
qp->s_num_rd_atomic++;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
if (qp->s_num_rd_atomic >=
qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
qp->s_num_rd_atomic++;
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
put_ib_reth_vaddr(
wqe->rdma_wr.remote_addr,
&ohdr->u.rc.reth);
@ -508,20 +597,92 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_cur = 0;
break;
case IB_WR_TID_RDMA_READ:
trace_hfi1_tid_read_sender_make_req(qp, newreq);
wpriv = wqe->priv;
req = wqe_to_tid_req(wqe);
trace_hfi1_tid_req_make_req_read(qp, newreq,
wqe->wr.opcode,
wqe->psn, wqe->lpsn,
req);
delta = cmp_psn(qp->s_psn, wqe->psn);
/*
* Don't allow more operations to be started
* than the QP limits allow. We could get here under
* three conditions; (1) It's a new request; (2) We are
* sending the second or later segment of a request,
* but the qp->s_state is set to OP(RDMA_READ_REQUEST)
* when the last segment of a previous request is
* received just before this; (3) We are re-sending a
* request.
*/
if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
if (newreq) {
struct tid_rdma_flow *flow =
&req->flows[req->setup_head];
/*
* Set up s_sge as it is needed for TID
* allocation. However, if the pages have been
* walked and mapped, skip it. An earlier try
* has failed to allocate the TID entries.
*/
if (!flow->npagesets) {
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
qp->s_len = wqe->length;
req->isge = 0;
req->clear_tail = req->setup_head;
req->flow_idx = req->setup_head;
req->state = TID_REQUEST_ACTIVE;
}
} else if (delta == 0) {
/* Re-send a request */
req->cur_seg = 0;
req->comp_seg = 0;
req->ack_pending = 0;
req->flow_idx = req->clear_tail;
req->state = TID_REQUEST_RESEND;
}
req->s_next_psn = qp->s_psn;
/* Read one segment at a time */
len = min_t(u32, req->seg_len,
wqe->length - req->seg_len * req->cur_seg);
delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr,
&bth1, &bth2,
&len);
if (delta <= 0) {
/* Wait for TID space */
goto bail;
}
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
hwords += delta;
ss = &wpriv->ss;
/* Check if this is the last segment */
if (req->cur_seg >= req->total_segs &&
++qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
/*
* Don't allow more operations to be started
* than the QP limits allow.
*/
if (newreq) {
if (qp->s_num_rd_atomic >=
qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
qp->s_num_rd_atomic++;
if (qp->s_num_rd_atomic >=
qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
qp->s_num_rd_atomic++;
/* FALLTHROUGH */
case IB_WR_OPFN:
@ -555,11 +716,13 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
default:
goto bail;
}
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
qp->s_len = wqe->length;
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ) {
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
qp->s_len = wqe->length;
}
if (newreq) {
qp->s_tail++;
if (qp->s_tail >= qp->s_size)
@ -567,6 +730,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
if (wqe->wr.opcode == IB_WR_RDMA_READ)
qp->s_psn = wqe->lpsn + 1;
else if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
qp->s_psn = req->s_next_psn;
else
qp->s_psn++;
break;
@ -683,6 +848,103 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case TID_OP(READ_RESP):
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
goto bail;
/* This is used to restart a TID read request */
req = wqe_to_tid_req(wqe);
wpriv = wqe->priv;
/*
* Back down. The field qp->s_psn has been set to the psn with
* which the request should be restart. It's OK to use division
* as this is on the retry path.
*/
req->cur_seg = delta_psn(qp->s_psn, wqe->psn) / priv->pkts_ps;
/*
* The following function need to be redefined to return the
* status to make sure that we find the flow. At the same
* time, we can use the req->state change to check if the
* call succeeds or not.
*/
req->state = TID_REQUEST_RESEND;
hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
if (req->state != TID_REQUEST_ACTIVE) {
/*
* Failed to find the flow. Release all allocated tid
* resources.
*/
hfi1_kern_exp_rcv_clear_all(req);
hfi1_kern_clear_hw_flow(priv->rcd, qp);
hfi1_trdma_send_complete(qp, wqe, IB_WC_LOC_QP_OP_ERR);
goto bail;
}
req->state = TID_REQUEST_RESEND;
len = min_t(u32, req->seg_len,
wqe->length - req->seg_len * req->cur_seg);
flow = &req->flows[req->flow_idx];
len -= flow->sent;
req->s_next_psn = flow->flow_state.ib_lpsn + 1;
delta = hfi1_build_tid_rdma_read_packet(wqe, ohdr, &bth1,
&bth2, &len);
if (delta <= 0) {
/* Wait for TID space */
goto bail;
}
hwords += delta;
ss = &wpriv->ss;
/* Check if this is the last segment */
if (req->cur_seg >= req->total_segs &&
++qp->s_cur == qp->s_size)
qp->s_cur = 0;
qp->s_psn = req->s_next_psn;
trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
wqe->psn, wqe->lpsn, req);
break;
case TID_OP(READ_REQ):
req = wqe_to_tid_req(wqe);
delta = cmp_psn(qp->s_psn, wqe->psn);
/*
* If the current WR is not TID RDMA READ, or this is the start
* of a new request, we need to change the qp->s_state so that
* the request can be set up properly.
*/
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ || delta == 0 ||
qp->s_cur == qp->s_tail) {
qp->s_state = OP(RDMA_READ_REQUEST);
if (delta == 0 || qp->s_cur == qp->s_tail)
goto check_s_state;
else
goto bail;
}
/* Rate limiting */
if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
wpriv = wqe->priv;
/* Read one segment at a time */
len = min_t(u32, req->seg_len,
wqe->length - req->seg_len * req->cur_seg);
delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr, &bth1,
&bth2, &len);
if (delta <= 0) {
/* Wait for TID space */
goto bail;
}
hwords += delta;
ss = &wpriv->ss;
/* Check if this is the last segment */
if (req->cur_seg >= req->total_segs &&
++qp->s_cur == qp->s_size)
qp->s_cur = 0;
qp->s_psn = req->s_next_psn;
trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
wqe->psn, wqe->lpsn, req);
break;
}
qp->s_sending_hpsn = bth2;
delta = delta_psn(bth2, wqe->psn);
@ -950,6 +1212,43 @@ void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn)
return;
}
/**
* update_num_rd_atomic - update the qp->s_num_rd_atomic
* @qp: the QP
* @psn: the packet sequence number to restart at
* @wqe: the wqe
*
* This is called from reset_psn() to update qp->s_num_rd_atomic
* for the current wqe.
* Called at interrupt level with the QP s_lock held.
*/
static void update_num_rd_atomic(struct rvt_qp *qp, u32 psn,
struct rvt_swqe *wqe)
{
u32 opcode = wqe->wr.opcode;
if (opcode == IB_WR_RDMA_READ ||
opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
qp->s_num_rd_atomic++;
} else if (opcode == IB_WR_TID_RDMA_READ) {
struct tid_rdma_request *req = wqe_to_tid_req(wqe);
struct hfi1_qp_priv *priv = qp->priv;
if (cmp_psn(psn, wqe->lpsn) <= 0) {
u32 cur_seg;
cur_seg = (psn - wqe->psn) / priv->pkts_ps;
req->ack_pending = cur_seg - req->comp_seg;
priv->pending_tid_r_segs += req->ack_pending;
qp->s_num_rd_atomic += req->ack_pending;
} else {
priv->pending_tid_r_segs += req->total_segs;
qp->s_num_rd_atomic += req->total_segs;
}
}
}
/**
* reset_psn - reset the QP state to send starting from PSN
* @qp: the QP
@ -964,9 +1263,12 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
u32 n = qp->s_acked;
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
u32 opcode;
struct hfi1_qp_priv *priv = qp->priv;
lockdep_assert_held(&qp->s_lock);
qp->s_cur = n;
priv->pending_tid_r_segs = 0;
qp->s_num_rd_atomic = 0;
/*
* If we are starting the request from the beginning,
@ -976,9 +1278,9 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
qp->s_state = OP(SEND_LAST);
goto done;
}
update_num_rd_atomic(qp, psn, wqe);
/* Find the work request opcode corresponding to the given PSN. */
opcode = wqe->wr.opcode;
for (;;) {
int diff;
@ -988,8 +1290,11 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
break;
wqe = rvt_get_swqe_ptr(qp, n);
diff = cmp_psn(psn, wqe->psn);
if (diff < 0)
if (diff < 0) {
/* Point wqe back to the previous one*/
wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
break;
}
qp->s_cur = n;
/*
* If we are starting the request from the beginning,
@ -999,8 +1304,10 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
qp->s_state = OP(SEND_LAST);
goto done;
}
opcode = wqe->wr.opcode;
update_num_rd_atomic(qp, psn, wqe);
}
opcode = wqe->wr.opcode;
/*
* Set the state to restart in the middle of a request.
@ -1022,6 +1329,10 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
break;
case IB_WR_TID_RDMA_READ:
qp->s_state = TID_OP(READ_RESP);
break;
default:
/*
* This case shouldn't happen since its only
@ -1030,6 +1341,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
qp->s_state = OP(SEND_LAST);
}
done:
priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
qp->s_psn = psn;
/*
* Set RVT_S_WAIT_PSN as rc_complete() may start the timer
@ -1040,6 +1352,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
(cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
qp->s_flags |= RVT_S_WAIT_PSN;
qp->s_flags &= ~HFI1_S_AHG_VALID;
trace_hfi1_sender_reset_psn(qp);
}
/*
@ -1054,6 +1367,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
lockdep_assert_held(&qp->r_lock);
lockdep_assert_held(&qp->s_lock);
trace_hfi1_sender_restart_rc(qp);
if (qp->s_retry == 0) {
if (qp->s_mig_state == IB_MIG_ARMED) {
hfi1_migrate_qp(qp);
@ -1075,8 +1389,16 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
wqe = do_rc_completion(qp, wqe, ibp);
qp->s_flags &= ~RVT_S_WAIT_ACK;
} else {
rvt_send_complete(qp, wqe,
IB_WC_RETRY_EXC_ERR);
if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
struct tid_rdma_request *req;
req = wqe_to_tid_req(wqe);
hfi1_kern_exp_rcv_clear_all(req);
hfi1_kern_clear_hw_flow(priv->rcd, qp);
}
hfi1_trdma_send_complete(qp, wqe,
IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
return;
@ -1088,7 +1410,8 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
}
ibp = to_iport(qp->ibqp.device, qp->port_num);
if (wqe->wr.opcode == IB_WR_RDMA_READ)
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_READ)
ibp->rvp.n_rc_resends++;
else
ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
@ -1115,7 +1438,8 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
for (;;) {
wqe = rvt_get_swqe_ptr(qp, n);
if (cmp_psn(psn, wqe->lpsn) <= 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ)
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_READ)
qp->s_sending_psn = wqe->lpsn + 1;
else
qp->s_sending_psn = psn + 1;
@ -1164,8 +1488,9 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
}
opcode = ib_bth_get_opcode(ohdr);
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
opcode == TID_OP(READ_RESP)) {
WARN_ON(!qp->s_rdma_ack_cnt);
qp->s_rdma_ack_cnt--;
return;
@ -1181,8 +1506,12 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
!(qp->s_flags &
(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
rvt_add_retry_timer(qp);
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
if (opcode == TID_OP(READ_REQ))
rvt_add_retry_timer_ext(qp, priv->timeout_shift);
else
rvt_add_retry_timer(qp);
}
while (qp->s_last != qp->s_acked) {
u32 s_last;
@ -1191,6 +1520,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
trdma_clean_swqe(qp, wqe);
rvt_qp_wqe_unreserve(qp, wqe);
s_last = qp->s_last;
trace_hfi1_qp_send_completion(qp, wqe, s_last);
@ -1229,20 +1559,24 @@ static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
* This is similar to hfi1_send_complete but has to check to be sure
* that the SGEs are not being referenced if the SWQE is being resent.
*/
static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct hfi1_ibport *ibp)
struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct hfi1_ibport *ibp)
{
struct hfi1_qp_priv *priv = qp->priv;
lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
* completion if the SWQE is being resent until the send
* is finished.
*/
trace_hfi1_rc_completion(qp, wqe->lpsn);
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
u32 s_last;
trdma_clean_swqe(qp, wqe);
rvt_put_swqe(wqe);
rvt_qp_wqe_unreserve(qp, wqe);
s_last = qp->s_last;
@ -1300,6 +1634,10 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
qp->s_draining = 0;
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
}
if (priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) {
priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
hfi1_schedule_send(qp);
}
return wqe;
}
@ -1314,11 +1652,12 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
* May be called at interrupt level, with the QP s_lock held.
* Returns 1 if OK, 0 if current operation should be aborted (NAK).
*/
static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
u64 val, struct hfi1_ctxtdata *rcd)
int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
u64 val, struct hfi1_ctxtdata *rcd)
{
struct hfi1_ibport *ibp;
enum ib_wc_status status;
struct hfi1_qp_priv *qpriv = qp->priv;
struct rvt_swqe *wqe;
int ret = 0;
u32 ack_psn;
@ -1365,6 +1704,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
*/
if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
(opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
(wqe->wr.opcode == IB_WR_TID_RDMA_READ &&
(opcode != TID_OP(READ_RESP) || diff != 0)) ||
((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
(opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
@ -1415,10 +1756,18 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
break;
}
trace_hfi1_rc_ack_do(qp, aeth, psn, wqe);
trace_hfi1_sender_do_rc_ack(qp);
switch (aeth >> IB_AETH_NAK_SHIFT) {
case 0: /* ACK */
this_cpu_inc(*ibp->rvp.rc_acks);
if (qp->s_acked != qp->s_tail) {
if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
if (wqe_to_tid_req(wqe)->ack_pending)
rvt_mod_retry_timer_ext(qp,
qpriv->timeout_shift);
else
rvt_stop_rc_timers(qp);
} else if (qp->s_acked != qp->s_tail) {
/*
* We are expecting more ACKs so
* mod the retry timer.
@ -1507,7 +1856,10 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
ibp->rvp.n_other_naks++;
class_b:
if (qp->s_last == qp->s_acked) {
rvt_send_complete(qp, wqe, status);
if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
hfi1_kern_read_tid_flow_free(qp);
hfi1_trdma_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
break;
@ -1548,6 +1900,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
while (cmp_psn(psn, wqe->lpsn) > 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
break;
@ -1754,16 +2107,6 @@ static void rc_rcv_resp(struct hfi1_packet *packet)
return;
}
static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
struct rvt_qp *qp)
{
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
static inline void rc_cancel_ack(struct rvt_qp *qp)
{
qp->r_adefered = 0;
@ -1796,8 +2139,9 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct rvt_ack_entry *e;
unsigned long flags;
u8 i, prev;
int old_req;
u8 prev;
u8 mra; /* most recent ACK */
bool old_req;
trace_hfi1_rcv_error(qp, psn);
if (diff > 0) {
@ -1843,29 +2187,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
spin_lock_irqsave(&qp->s_lock, flags);
for (i = qp->r_head_ack_queue; ; i = prev) {
if (i == qp->s_tail_ack_queue)
old_req = 0;
if (i)
prev = i - 1;
else
prev = rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
if (prev == qp->r_head_ack_queue) {
e = NULL;
break;
}
e = &qp->s_ack_queue[prev];
if (!e->opcode) {
e = NULL;
break;
}
if (cmp_psn(psn, e->psn) >= 0) {
if (prev == qp->s_tail_ack_queue &&
cmp_psn(psn, e->lpsn) <= 0)
old_req = 0;
break;
}
}
e = find_prev_entry(qp, psn, &prev, &mra, &old_req);
switch (opcode) {
case OP(RDMA_READ_REQUEST): {
struct ib_reth *reth;
@ -1940,7 +2263,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
* Resend the most recent ACK if this request is
* after all the previous RDMA reads and atomics.
*/
if (i == qp->r_head_ack_queue) {
if (mra == qp->r_head_ack_queue) {
spin_unlock_irqrestore(&qp->s_lock, flags);
qp->r_nak_state = 0;
qp->r_ack_psn = qp->r_psn - 1;
@ -1951,7 +2274,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
* Resend the RDMA read or atomic op which
* ACKs this duplicate request.
*/
qp->s_tail_ack_queue = i;
qp->s_tail_ack_queue = mra;
break;
}
qp->s_ack_state = OP(ACKNOWLEDGE);
@ -1968,17 +2291,6 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
return 0;
}
static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
{
unsigned next;
next = n + 1;
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
qp->s_tail_ack_queue = next;
qp->s_ack_state = OP(ACKNOWLEDGE);
}
static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
u32 lqpn, u32 rqpn, u8 svc_type)
{

View File

@ -0,0 +1,50 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#ifndef HFI1_RC_H
#define HFI1_RC_H
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
static inline void update_ack_queue(struct rvt_qp *qp, unsigned int n)
{
unsigned int next;
next = n + 1;
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
qp->s_tail_ack_queue = next;
qp->s_ack_state = OP(ACKNOWLEDGE);
}
static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
struct rvt_qp *qp)
{
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
u32 len;
len = delta_psn(psn, wqe->psn) * pmtu;
return rvt_restart_sge(ss, wqe, len);
}
struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
u8 *prev_ack, bool *scheduled);
int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val,
struct hfi1_ctxtdata *rcd);
struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct hfi1_ibport *ibp);
#endif /* HFI1_RC_H */

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,27 @@
#ifndef HFI1_TID_RDMA_H
#define HFI1_TID_RDMA_H
#include <linux/circ_buf.h>
#include "common.h"
/* Add a convenience helper */
#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
/*
* Bit definitions for priv->s_flags.
* These bit flags overload the bit flags defined for the QP's s_flags.
* Due to the fact that these bit fields are used only for the QP priv
* s_flags, there are no collisions.
*
* HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
*/
#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
struct tid_rdma_params {
struct rcu_head rcu_head;
@ -21,10 +41,128 @@ struct tid_rdma_params {
};
struct tid_rdma_qp_params {
struct work_struct trigger_work;
struct tid_rdma_params local;
struct tid_rdma_params __rcu *remote;
};
/* Track state for each hardware flow */
struct tid_flow_state {
u32 generation;
u32 psn;
u32 r_next_psn; /* next PSN to be received (in TID space) */
u8 index;
u8 last_index;
u8 flags;
};
enum tid_rdma_req_state {
TID_REQUEST_INACTIVE = 0,
TID_REQUEST_INIT,
TID_REQUEST_INIT_RESEND,
TID_REQUEST_ACTIVE,
TID_REQUEST_RESEND,
TID_REQUEST_RESEND_ACTIVE,
TID_REQUEST_QUEUED,
TID_REQUEST_SYNC,
TID_REQUEST_RNR_NAK,
TID_REQUEST_COMPLETE,
};
struct tid_rdma_request {
struct rvt_qp *qp;
struct hfi1_ctxtdata *rcd;
union {
struct rvt_swqe *swqe;
struct rvt_ack_entry *ack;
} e;
struct tid_rdma_flow *flows; /* array of tid flows */
u16 n_flows; /* size of the flow buffer window */
u16 setup_head; /* flow index we are setting up */
u16 clear_tail; /* flow index we are clearing */
u16 flow_idx; /* flow index most recently set up */
u32 seg_len;
u32 total_len;
u32 r_flow_psn; /* IB PSN of next segment start */
u32 s_next_psn; /* IB PSN of next segment start for read */
u32 total_segs; /* segments required to complete a request */
u32 cur_seg; /* index of current segment */
u32 comp_seg; /* index of last completed segment */
u32 ack_seg; /* index of last ack'ed segment */
u32 isge; /* index of "current" sge */
u32 ack_pending; /* num acks pending for this request */
enum tid_rdma_req_state state;
};
/*
* When header suppression is used, PSNs associated with a "flow" are
* relevant (and not the PSNs maintained by verbs). Track per-flow
* PSNs here for a TID RDMA segment.
*
*/
struct flow_state {
u32 flags;
u32 resp_ib_psn; /* The IB PSN of the response for this flow */
u32 generation; /* generation of flow */
u32 spsn; /* starting PSN in TID space */
u32 lpsn; /* last PSN in TID space */
u32 r_next_psn; /* next PSN to be received (in TID space) */
/* For tid rdma read */
u32 ib_spsn; /* starting PSN in Verbs space */
u32 ib_lpsn; /* last PSn in Verbs space */
};
struct tid_rdma_pageset {
dma_addr_t addr : 48; /* Only needed for the first page */
u8 idx: 8;
u8 count : 7;
u8 mapped: 1;
};
/**
* kern_tid_node - used for managing TID's in TID groups
*
* @grp_idx: rcd relative index to tid_group
* @map: grp->map captured prior to programming this TID group in HW
* @cnt: Only @cnt of available group entries are actually programmed
*/
struct kern_tid_node {
struct tid_group *grp;
u8 map;
u8 cnt;
};
/* Overall info for a TID RDMA segment */
struct tid_rdma_flow {
/*
* While a TID RDMA segment is being transferred, it uses a QP number
* from the "KDETH section of QP numbers" (which is different from the
* QP number that originated the request). Bits 11-15 of these QP
* numbers identify the "TID flow" for the segment.
*/
struct flow_state flow_state;
struct tid_rdma_request *req;
u32 tid_qpn;
u32 tid_offset;
u32 length;
u32 sent;
u8 tnode_cnt;
u8 tidcnt;
u8 tid_idx;
u8 idx;
u8 npagesets;
u8 npkts;
u8 pkt;
struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
u32 tid_entry[TID_RDMA_MAX_PAGES];
};
bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
@ -32,9 +170,67 @@ void tid_rdma_conn_error(struct rvt_qp *qp);
void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p);
int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
struct rvt_sge_state *ss, bool *last);
int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req);
void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req);
void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
/**
* trdma_clean_swqe - clean flows for swqe if large send queue
* @qp: the qp
* @wqe: the send wqe
*/
static inline void trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
{
if (!wqe->priv)
return;
__trdma_clean_swqe(qp, wqe);
}
void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp);
int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
struct ib_qp_init_attr *init_attr);
void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp);
int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd);
struct cntr_entry;
u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
void *context, int vl, int mode, u64 data);
u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr, u32 *bth1,
u32 *bth2, u32 *len);
void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_other_headers *ohdr, u32 *bth0,
u32 *bth1, u32 *bth2, u32 *len, bool *last);
void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet);
bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
struct hfi1_pportdata *ppd,
struct hfi1_packet *packet);
void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
u32 *bth2);
void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe);
void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp,
struct rvt_swqe *wqe)
{
if (wqe->priv &&
wqe->wr.opcode == IB_WR_RDMA_READ &&
wqe->length >= TID_RDMA_MIN_SEGMENT_SIZE)
setup_tid_rdma_wqe(qp, wqe);
}
#endif /* HFI1_TID_RDMA_H */

View File

@ -46,6 +46,7 @@
*/
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "exp_rcv.h"
static u8 __get_ib_hdr_len(struct ib_header *hdr)
{
@ -128,6 +129,10 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
#define IETH_PRN "ieth rkey:0x%.8x"
#define ATOMICACKETH_PRN "origdata:%llx"
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
#define TID_RDMA_KDETH "kdeth0 0x%x kdeth1 0x%x"
#define TID_RDMA_KDETH_DATA "kdeth0 0x%x: kver %u sh %u intr %u tidctrl %u tid %x offset %x kdeth1 0x%x: jkey %x"
#define TID_READ_REQ_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_READ_RSP_PRN "verbs_qp 0x%x"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
@ -322,6 +327,38 @@ const char *parse_everbs_hdrs(
parse_syndrome(be32_to_cpu(eh->aeth) >> 24),
be32_to_cpu(eh->aeth) & IB_MSN_MASK);
break;
case OP(TID_RDMA, READ_REQ):
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
TID_READ_REQ_PRN,
le32_to_cpu(eh->tid_rdma.r_req.kdeth0),
le32_to_cpu(eh->tid_rdma.r_req.kdeth1),
ib_u64_get(&eh->tid_rdma.r_req.reth.vaddr),
be32_to_cpu(eh->tid_rdma.r_req.reth.rkey),
be32_to_cpu(eh->tid_rdma.r_req.reth.length),
be32_to_cpu(eh->tid_rdma.r_req.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.r_req.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.r_req.verbs_qp));
break;
case OP(TID_RDMA, READ_RESP):
trace_seq_printf(p, TID_RDMA_KDETH_DATA " " AETH_PRN " "
TID_READ_RSP_PRN,
le32_to_cpu(eh->tid_rdma.r_rsp.kdeth0),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, KVER),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, SH),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, INTR),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TIDCTRL),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TID),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, OFFSET),
le32_to_cpu(eh->tid_rdma.r_rsp.kdeth1),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth1, JKEY),
be32_to_cpu(eh->tid_rdma.r_rsp.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.r_rsp.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.r_rsp.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.r_rsp.verbs_qp));
break;
/* aeth + atomicacketh */
case OP(RC, ATOMIC_ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,
@ -394,6 +431,21 @@ const char *print_u32_array(
return ret;
}
u8 hfi1_trace_get_tid_ctrl(u32 ent)
{
return EXP_TID_GET(ent, CTRL);
}
u16 hfi1_trace_get_tid_len(u32 ent)
{
return EXP_TID_GET(ent, LEN);
}
u16 hfi1_trace_get_tid_idx(u32 ent)
{
return EXP_TID_GET(ent, IDX);
}
__hfi1_trace_fn(AFFINITY);
__hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC);

View File

@ -79,6 +79,8 @@ __print_symbolic(opcode, \
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
ib_opcode_name(RC_COMPARE_SWAP), \
ib_opcode_name(RC_FETCH_ADD), \
ib_opcode_name(TID_RDMA_READ_REQ), \
ib_opcode_name(TID_RDMA_READ_RESP), \
ib_opcode_name(UC_SEND_FIRST), \
ib_opcode_name(UC_SEND_MIDDLE), \
ib_opcode_name(UC_SEND_LAST), \

View File

@ -109,6 +109,54 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_rc_template, hfi1_rc_completion,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DECLARE_EVENT_CLASS(/* rc_ack */
hfi1_rc_ack_template,
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
struct rvt_swqe *wqe),
TP_ARGS(qp, aeth, psn, wqe),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u32, aeth)
__field(u32, psn)
__field(u8, opcode)
__field(u32, spsn)
__field(u32, lpsn)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->aeth = aeth;
__entry->psn = psn;
__entry->opcode = wqe->wr.opcode;
__entry->spsn = wqe->psn;
__entry->lpsn = wqe->lpsn;
),
TP_printk(/* print */
"[%s] qpn 0x%x aeth 0x%x psn 0x%x opcode 0x%x spsn 0x%x lpsn 0x%x",
__get_str(dev),
__entry->qpn,
__entry->aeth,
__entry->psn,
__entry->opcode,
__entry->spsn,
__entry->lpsn
)
);
DEFINE_EVENT(/* do_rc_ack */
hfi1_rc_ack_template, hfi1_rc_ack_do,
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
struct rvt_swqe *wqe),
TP_ARGS(qp, aeth, psn, wqe)
);
#endif /* __HFI1_TRACE_RC_H */
#undef TRACE_INCLUDE_PATH

View File

@ -21,10 +21,51 @@ __print_symbolic(type, \
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_tid
u8 hfi1_trace_get_tid_ctrl(u32 ent);
u16 hfi1_trace_get_tid_len(u32 ent);
u16 hfi1_trace_get_tid_idx(u32 ent);
#define OPFN_PARAM_PRN "[%s] qpn 0x%x %s OPFN: qp 0x%x, max read %u, " \
"max write %u, max length %u, jkey 0x%x timeout %u " \
"urg %u"
#define TID_FLOW_PRN "[%s] qpn 0x%x flow %d: idx %d resp_ib_psn 0x%x " \
"generation 0x%x fpsn 0x%x-%x r_next_psn 0x%x " \
"ib_psn 0x%x-%x npagesets %u tnode_cnt %u " \
"tidcnt %u tid_idx %u tid_offset %u length %u sent %u"
#define TID_NODE_PRN "[%s] qpn 0x%x %s idx %u grp base 0x%x map 0x%x " \
"used %u cnt %u"
#define RSP_INFO_PRN "[%s] qpn 0x%x state 0x%x s_state 0x%x psn 0x%x " \
"r_psn 0x%x r_state 0x%x r_flags 0x%x " \
"r_head_ack_queue %u s_tail_ack_queue %u " \
"s_ack_state 0x%x " \
"s_nak_state 0x%x s_flags 0x%x ps_flags 0x%x " \
"iow_flags 0x%lx"
#define SENDER_INFO_PRN "[%s] qpn 0x%x state 0x%x s_cur %u s_tail %u " \
"s_head %u s_acked %u s_last %u s_psn 0x%x " \
"s_last_psn 0x%x s_flags 0x%x ps_flags 0x%x " \
"iow_flags 0x%lx s_state 0x%x s_num_rd %u s_retry %u"
#define TID_READ_SENDER_PRN "[%s] qpn 0x%x newreq %u tid_r_reqs %u " \
"tid_r_comp %u pending_tid_r_segs %u " \
"s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \
"hw_flow_index %u generation 0x%x " \
"fpsn 0x%x flow_flags 0x%x"
#define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \
"cur_seg %u comp_seg %u ack_seg %u " \
"total_segs %u setup_head %u clear_tail %u flow_idx %u " \
"state %u r_flow_psn 0x%x " \
"s_next_psn 0x%x"
#define RCV_ERR_PRN "[%s] qpn 0x%x s_flags 0x%x state 0x%x " \
"s_tail_ack_queue %u " \
"r_head_ack_queue %u opcode 0x%x psn 0x%x r_psn 0x%x " \
" diff %d"
DECLARE_EVENT_CLASS(/* class */
hfi1_exp_tid_reg_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
@ -323,6 +364,723 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, msg, more)
);
DEFINE_EVENT(/* event */
hfi1_msg_template, hfi1_msg_alloc_tids,
TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
TP_ARGS(qp, msg, more)
);
DEFINE_EVENT(/* event */
hfi1_msg_template, hfi1_msg_tid_restart_req,
TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
TP_ARGS(qp, msg, more)
);
DEFINE_EVENT(/* event */
hfi1_msg_template, hfi1_msg_handle_kdeth_eflags,
TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
TP_ARGS(qp, msg, more)
);
DECLARE_EVENT_CLASS(/* tid_flow_page */
hfi1_tid_flow_page_template,
TP_PROTO(struct rvt_qp *qp, struct tid_rdma_flow *flow, u32 index,
char mtu8k, char v1, void *vaddr),
TP_ARGS(qp, flow, index, mtu8k, v1, vaddr),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(char, mtu8k)
__field(char, v1)
__field(u32, index)
__field(u64, page)
__field(u64, vaddr)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->mtu8k = mtu8k;
__entry->v1 = v1;
__entry->index = index;
__entry->page = vaddr ? (u64)virt_to_page(vaddr) : 0ULL;
__entry->vaddr = (u64)vaddr;
),
TP_printk(/* print */
"[%s] qpn 0x%x page[%u]: page 0x%llx %s 0x%llx",
__get_str(dev),
__entry->qpn,
__entry->index,
__entry->page,
__entry->mtu8k ? (__entry->v1 ? "v1" : "v0") : "vaddr",
__entry->vaddr
)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_page_template, hfi1_tid_flow_page,
TP_PROTO(struct rvt_qp *qp, struct tid_rdma_flow *flow, u32 index,
char mtu8k, char v1, void *vaddr),
TP_ARGS(qp, flow, index, mtu8k, v1, vaddr)
);
DECLARE_EVENT_CLASS(/* tid_pageset */
hfi1_tid_pageset_template,
TP_PROTO(struct rvt_qp *qp, u32 index, u16 idx, u16 count),
TP_ARGS(qp, index, idx, count),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u32, index)
__field(u16, idx)
__field(u16, count)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->index = index;
__entry->idx = idx;
__entry->count = count;
),
TP_printk(/* print */
"[%s] qpn 0x%x list[%u]: idx %u count %u",
__get_str(dev),
__entry->qpn,
__entry->index,
__entry->idx,
__entry->count
)
);
DEFINE_EVENT(/* event */
hfi1_tid_pageset_template, hfi1_tid_pageset,
TP_PROTO(struct rvt_qp *qp, u32 index, u16 idx, u16 count),
TP_ARGS(qp, index, idx, count)
);
DECLARE_EVENT_CLASS(/* tid_fow */
hfi1_tid_flow_template,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(int, index)
__field(int, idx)
__field(u32, resp_ib_psn)
__field(u32, generation)
__field(u32, fspsn)
__field(u32, flpsn)
__field(u32, r_next_psn)
__field(u32, ib_spsn)
__field(u32, ib_lpsn)
__field(u32, npagesets)
__field(u32, tnode_cnt)
__field(u32, tidcnt)
__field(u32, tid_idx)
__field(u32, tid_offset)
__field(u32, length)
__field(u32, sent)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->index = index;
__entry->idx = flow->idx;
__entry->resp_ib_psn = flow->flow_state.resp_ib_psn;
__entry->generation = flow->flow_state.generation;
__entry->fspsn = full_flow_psn(flow,
flow->flow_state.spsn);
__entry->flpsn = full_flow_psn(flow,
flow->flow_state.lpsn);
__entry->r_next_psn = flow->flow_state.r_next_psn;
__entry->ib_spsn = flow->flow_state.ib_spsn;
__entry->ib_lpsn = flow->flow_state.ib_lpsn;
__entry->npagesets = flow->npagesets;
__entry->tnode_cnt = flow->tnode_cnt;
__entry->tidcnt = flow->tidcnt;
__entry->tid_idx = flow->tid_idx;
__entry->tid_offset = flow->tid_offset;
__entry->length = flow->length;
__entry->sent = flow->sent;
),
TP_printk(/* print */
TID_FLOW_PRN,
__get_str(dev),
__entry->qpn,
__entry->index,
__entry->idx,
__entry->resp_ib_psn,
__entry->generation,
__entry->fspsn,
__entry->flpsn,
__entry->r_next_psn,
__entry->ib_spsn,
__entry->ib_lpsn,
__entry->npagesets,
__entry->tnode_cnt,
__entry->tidcnt,
__entry->tid_idx,
__entry->tid_offset,
__entry->length,
__entry->sent
)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_alloc,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_build_read_pkt,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_build_read_resp,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_rcv_read_req,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_rcv_read_resp,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_restart_req,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DECLARE_EVENT_CLASS(/* tid_node */
hfi1_tid_node_template,
TP_PROTO(struct rvt_qp *qp, const char *msg, u32 index, u32 base,
u8 map, u8 used, u8 cnt),
TP_ARGS(qp, msg, index, base, map, used, cnt),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__string(msg, msg)
__field(u32, index)
__field(u32, base)
__field(u8, map)
__field(u8, used)
__field(u8, cnt)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__assign_str(msg, msg);
__entry->index = index;
__entry->base = base;
__entry->map = map;
__entry->used = used;
__entry->cnt = cnt;
),
TP_printk(/* print */
TID_NODE_PRN,
__get_str(dev),
__entry->qpn,
__get_str(msg),
__entry->index,
__entry->base,
__entry->map,
__entry->used,
__entry->cnt
)
);
DEFINE_EVENT(/* event */
hfi1_tid_node_template, hfi1_tid_node_add,
TP_PROTO(struct rvt_qp *qp, const char *msg, u32 index, u32 base,
u8 map, u8 used, u8 cnt),
TP_ARGS(qp, msg, index, base, map, used, cnt)
);
DECLARE_EVENT_CLASS(/* tid_entry */
hfi1_tid_entry_template,
TP_PROTO(struct rvt_qp *qp, int index, u32 ent),
TP_ARGS(qp, index, ent),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(int, index)
__field(u8, ctrl)
__field(u16, idx)
__field(u16, len)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->index = index;
__entry->ctrl = hfi1_trace_get_tid_ctrl(ent);
__entry->idx = hfi1_trace_get_tid_idx(ent);
__entry->len = hfi1_trace_get_tid_len(ent);
),
TP_printk(/* print */
"[%s] qpn 0x%x TID entry %d: idx %u len %u ctrl 0x%x",
__get_str(dev),
__entry->qpn,
__entry->index,
__entry->idx,
__entry->len,
__entry->ctrl
)
);
DEFINE_EVENT(/* event */
hfi1_tid_entry_template, hfi1_tid_entry_alloc,
TP_PROTO(struct rvt_qp *qp, int index, u32 entry),
TP_ARGS(qp, index, entry)
);
DEFINE_EVENT(/* event */
hfi1_tid_entry_template, hfi1_tid_entry_build_read_resp,
TP_PROTO(struct rvt_qp *qp, int index, u32 ent),
TP_ARGS(qp, index, ent)
);
DEFINE_EVENT(/* event */
hfi1_tid_entry_template, hfi1_tid_entry_rcv_read_req,
TP_PROTO(struct rvt_qp *qp, int index, u32 ent),
TP_ARGS(qp, index, ent)
);
DECLARE_EVENT_CLASS(/* rsp_info */
hfi1_responder_info_template,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u8, state)
__field(u8, s_state)
__field(u32, psn)
__field(u32, r_psn)
__field(u8, r_state)
__field(u8, r_flags)
__field(u8, r_head_ack_queue)
__field(u8, s_tail_ack_queue)
__field(u8, s_ack_state)
__field(u8, s_nak_state)
__field(u8, r_nak_state)
__field(u32, s_flags)
__field(u32, ps_flags)
__field(unsigned long, iow_flags)
),
TP_fast_assign(/* assign */
struct hfi1_qp_priv *priv = qp->priv;
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->state = qp->state;
__entry->s_state = qp->s_state;
__entry->psn = psn;
__entry->r_psn = qp->r_psn;
__entry->r_state = qp->r_state;
__entry->r_flags = qp->r_flags;
__entry->r_head_ack_queue = qp->r_head_ack_queue;
__entry->s_tail_ack_queue = qp->s_tail_ack_queue;
__entry->s_ack_state = qp->s_ack_state;
__entry->s_nak_state = qp->s_nak_state;
__entry->s_flags = qp->s_flags;
__entry->ps_flags = priv->s_flags;
__entry->iow_flags = priv->s_iowait.flags;
),
TP_printk(/* print */
RSP_INFO_PRN,
__get_str(dev),
__entry->qpn,
__entry->state,
__entry->s_state,
__entry->psn,
__entry->r_psn,
__entry->r_state,
__entry->r_flags,
__entry->r_head_ack_queue,
__entry->s_tail_ack_queue,
__entry->s_ack_state,
__entry->s_nak_state,
__entry->s_flags,
__entry->ps_flags,
__entry->iow_flags
)
);
DEFINE_EVENT(/* event */
hfi1_responder_info_template, hfi1_rsp_make_rc_ack,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_responder_info_template, hfi1_rsp_rcv_tid_read_req,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_responder_info_template, hfi1_rsp_tid_rcv_error,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DECLARE_EVENT_CLASS(/* sender_info */
hfi1_sender_info_template,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u8, state)
__field(u32, s_cur)
__field(u32, s_tail)
__field(u32, s_head)
__field(u32, s_acked)
__field(u32, s_last)
__field(u32, s_psn)
__field(u32, s_last_psn)
__field(u32, s_flags)
__field(u32, ps_flags)
__field(unsigned long, iow_flags)
__field(u8, s_state)
__field(u8, s_num_rd)
__field(u8, s_retry)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->state = qp->state;
__entry->s_cur = qp->s_cur;
__entry->s_tail = qp->s_tail;
__entry->s_head = qp->s_head;
__entry->s_acked = qp->s_acked;
__entry->s_last = qp->s_last;
__entry->s_psn = qp->s_psn;
__entry->s_last_psn = qp->s_last_psn;
__entry->s_flags = qp->s_flags;
__entry->ps_flags = ((struct hfi1_qp_priv *)qp->priv)->s_flags;
__entry->iow_flags =
((struct hfi1_qp_priv *)qp->priv)->s_iowait.flags;
__entry->s_state = qp->s_state;
__entry->s_num_rd = qp->s_num_rd_atomic;
__entry->s_retry = qp->s_retry;
),
TP_printk(/* print */
SENDER_INFO_PRN,
__get_str(dev),
__entry->qpn,
__entry->state,
__entry->s_cur,
__entry->s_tail,
__entry->s_head,
__entry->s_acked,
__entry->s_last,
__entry->s_psn,
__entry->s_last_psn,
__entry->s_flags,
__entry->ps_flags,
__entry->iow_flags,
__entry->s_state,
__entry->s_num_rd,
__entry->s_retry
)
);
DEFINE_EVENT(/* event */
hfi1_sender_info_template, hfi1_sender_make_rc_req,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_sender_info_template, hfi1_sender_reset_psn,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_sender_info_template, hfi1_sender_restart_rc,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_sender_info_template, hfi1_sender_do_rc_ack,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_sender_info_template, hfi1_sender_rcv_tid_read_resp,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DECLARE_EVENT_CLASS(/* tid_read_sender */
hfi1_tid_read_sender_template,
TP_PROTO(struct rvt_qp *qp, char newreq),
TP_ARGS(qp, newreq),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(char, newreq)
__field(u32, tid_r_reqs)
__field(u32, tid_r_comp)
__field(u32, pending_tid_r_segs)
__field(u32, s_flags)
__field(u32, ps_flags)
__field(unsigned long, iow_flags)
__field(u32, hw_flow_index)
__field(u32, generation)
__field(u32, fpsn)
__field(u32, flow_flags)
),
TP_fast_assign(/* assign */
struct hfi1_qp_priv *priv = qp->priv;
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->newreq = newreq;
__entry->tid_r_reqs = priv->tid_r_reqs;
__entry->tid_r_comp = priv->tid_r_comp;
__entry->pending_tid_r_segs = priv->pending_tid_r_segs;
__entry->s_flags = qp->s_flags;
__entry->ps_flags = priv->s_flags;
__entry->iow_flags = priv->s_iowait.flags;
__entry->hw_flow_index = priv->flow_state.index;
__entry->generation = priv->flow_state.generation;
__entry->fpsn = priv->flow_state.psn;
__entry->flow_flags = priv->flow_state.flags;
),
TP_printk(/* print */
TID_READ_SENDER_PRN,
__get_str(dev),
__entry->qpn,
__entry->newreq,
__entry->tid_r_reqs,
__entry->tid_r_comp,
__entry->pending_tid_r_segs,
__entry->s_flags,
__entry->ps_flags,
__entry->iow_flags,
__entry->hw_flow_index,
__entry->generation,
__entry->fpsn,
__entry->flow_flags
)
);
DEFINE_EVENT(/* event */
hfi1_tid_read_sender_template, hfi1_tid_read_sender_make_req,
TP_PROTO(struct rvt_qp *qp, char newreq),
TP_ARGS(qp, newreq)
);
DECLARE_EVENT_CLASS(/* tid_rdma_request */
hfi1_tid_rdma_request_template,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(char, newreq)
__field(u8, opcode)
__field(u32, psn)
__field(u32, lpsn)
__field(u32, cur_seg)
__field(u32, comp_seg)
__field(u32, ack_seg)
__field(u32, total_segs)
__field(u16, setup_head)
__field(u16, clear_tail)
__field(u16, flow_idx)
__field(u32, state)
__field(u32, r_flow_psn)
__field(u32, s_next_psn)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->newreq = newreq;
__entry->opcode = opcode;
__entry->psn = psn;
__entry->lpsn = lpsn;
__entry->cur_seg = req->cur_seg;
__entry->comp_seg = req->comp_seg;
__entry->ack_seg = req->ack_seg;
__entry->total_segs = req->total_segs;
__entry->setup_head = req->setup_head;
__entry->clear_tail = req->clear_tail;
__entry->flow_idx = req->flow_idx;
__entry->state = req->state;
__entry->r_flow_psn = req->r_flow_psn;
__entry->s_next_psn = req->s_next_psn;
),
TP_printk(/* print */
TID_REQ_PRN,
__get_str(dev),
__entry->qpn,
__entry->newreq,
__entry->opcode,
__entry->psn,
__entry->lpsn,
__entry->cur_seg,
__entry->comp_seg,
__entry->ack_seg,
__entry->total_segs,
__entry->setup_head,
__entry->clear_tail,
__entry->flow_idx,
__entry->state,
__entry->r_flow_psn,
__entry->s_next_psn
)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_make_req_read,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_build_read_req,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_read_req,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_read_resp,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_err,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_restart_req,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_setup_tid_wqe,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DECLARE_EVENT_CLASS(/* rc_rcv_err */
hfi1_rc_rcv_err_template,
TP_PROTO(struct rvt_qp *qp, u32 opcode, u32 psn, int diff),
TP_ARGS(qp, opcode, psn, diff),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u32, s_flags)
__field(u8, state)
__field(u8, s_tail_ack_queue)
__field(u8, r_head_ack_queue)
__field(u32, opcode)
__field(u32, psn)
__field(u32, r_psn)
__field(int, diff)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
__entry->state = qp->state;
__entry->s_tail_ack_queue = qp->s_tail_ack_queue;
__entry->r_head_ack_queue = qp->r_head_ack_queue;
__entry->opcode = opcode;
__entry->psn = psn;
__entry->r_psn = qp->r_psn;
__entry->diff = diff;
),
TP_printk(/* print */
RCV_ERR_PRN,
__get_str(dev),
__entry->qpn,
__entry->s_flags,
__entry->state,
__entry->s_tail_ack_queue,
__entry->r_head_ack_queue,
__entry->opcode,
__entry->psn,
__entry->r_psn,
__entry->diff
)
);
DEFINE_EVENT(/* event */
hfi1_rc_rcv_err_template, hfi1_tid_rdma_rcv_err,
TP_PROTO(struct rvt_qp *qp, u32 opcode, u32 psn, int diff),
TP_ARGS(qp, opcode, psn, diff)
);
DECLARE_EVENT_CLASS(/* sge */
hfi1_sge_template,
TP_PROTO(struct rvt_qp *qp, int index, struct rvt_sge *sge),
TP_ARGS(qp, index, sge),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(int, index)
__field(u64, vaddr)
__field(u32, sge_length)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->index = index;
__entry->vaddr = (u64)sge->vaddr;
__entry->sge_length = sge->sge_length;
),
TP_printk(/* print */
"[%s] qpn 0x%x sge %d: vaddr 0x%llx sge_length %u",
__get_str(dev),
__entry->qpn,
__entry->index,
__entry->vaddr,
__entry->sge_length
)
);
DEFINE_EVENT(/* event */
hfi1_sge_template, hfi1_sge_check_align,
TP_PROTO(struct rvt_qp *qp, int index, struct rvt_sge *sge),
TP_ARGS(qp, index, sge)
);
#endif /* __HFI1_TRACE_TID_H */
#undef TRACE_INCLUDE_PATH

View File

@ -114,19 +114,27 @@ DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
__field(u32, qpn)
__field(u32, flags)
__field(u32, s_flags)
__field(u32, ps_flags)
__field(unsigned long, iow_flags)
),
TP_fast_assign(
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->flags = flags;
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
__entry->ps_flags =
((struct hfi1_qp_priv *)qp->priv)->s_flags;
__entry->iow_flags =
((struct hfi1_qp_priv *)qp->priv)->s_iowait.flags;
),
TP_printk(
"[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
"[%s] qpn 0x%x flags 0x%x s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx",
__get_str(dev),
__entry->qpn,
__entry->flags,
__entry->s_flags
__entry->s_flags,
__entry->ps_flags,
__entry->iow_flags
)
);

View File

@ -48,7 +48,6 @@
*/
#include "hfi.h"
#include "exp_rcv.h"
struct tid_pageset {

View File

@ -165,6 +165,7 @@ const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
@ -200,6 +201,8 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
[IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
[IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
@ -243,6 +246,11 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv,
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
/* TID RDMA has separate handlers for different opcodes.*/
[IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req,
[IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
[IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv,
@ -308,7 +316,7 @@ static inline opcode_handler qp_ok(struct hfi1_packet *packet)
static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
{
#ifdef CONFIG_FAULT_INJECTION
if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) {
/*
* In order to drop non-IB traffic we
* set PbcInsertHrc to NONE (0x2).
@ -319,8 +327,9 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
* packet will not be delivered to the
* correct context.
*/
pbc &= ~PBC_INSERT_HCRC_SMASK;
pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
else
} else {
/*
* In order to drop regular verbs
* traffic we set the PbcTestEbp
@ -330,10 +339,129 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
* triggered and will be dropped.
*/
pbc |= PBC_TEST_EBP;
}
#endif
return pbc;
}
static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet)
{
if (packet->qp->ibqp.qp_type != IB_QPT_RC ||
!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
return NULL;
if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA)
return opcode_handler_tbl[opcode];
return NULL;
}
void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler opcode_handler;
unsigned long flags;
u32 qp_num;
int lnh;
u8 opcode;
/* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
if (unlikely(tlen < 15 * sizeof(u32)))
goto drop;
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh != HFI1_LRH_BTH)
goto drop;
packet->ohdr = &hdr->u.oth;
trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* verbs_qp can be picked up from any tid_rdma header struct */
qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_req.verbs_qp) &
RVT_QPN_MASK;
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp)
goto drop_rcu;
spin_lock_irqsave(&packet->qp->r_lock, flags);
opcode_handler = tid_qp_ok(opcode, packet);
if (likely(opcode_handler))
opcode_handler(packet);
else
goto drop_unlock;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
rcu_read_unlock();
return;
drop_unlock:
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
drop_rcu:
rcu_read_unlock();
drop:
ibp->rvp.n_pkt_drops++;
}
void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler opcode_handler;
unsigned long flags;
u32 qp_num;
int lnh;
u8 opcode;
/* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
if (unlikely(tlen < 15 * sizeof(u32)))
goto drop;
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh != HFI1_LRH_BTH)
goto drop;
packet->ohdr = &hdr->u.oth;
trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* verbs_qp can be picked up from any tid_rdma header struct */
qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_rsp.verbs_qp) &
RVT_QPN_MASK;
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp)
goto drop_rcu;
spin_lock_irqsave(&packet->qp->r_lock, flags);
opcode_handler = tid_qp_ok(opcode, packet);
if (likely(opcode_handler))
opcode_handler(packet);
else
goto drop_unlock;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
rcu_read_unlock();
return;
drop_unlock:
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
drop_rcu:
rcu_read_unlock();
drop:
ibp->rvp.n_pkt_drops++;
}
static int hfi1_do_pkey_check(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
@ -504,11 +632,28 @@ static void verbs_sdma_complete(
hfi1_put_txreq(tx);
}
void hfi1_wait_kmem(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct ib_qp *ibqp = &qp->ibqp;
struct ib_device *ibdev = ibqp->device;
struct hfi1_ibdev *dev = to_idev(ibdev);
if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
rvt_get_qp(qp);
}
}
static int wait_kmem(struct hfi1_ibdev *dev,
struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags;
int ret = 0;
@ -517,15 +662,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
&ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
rvt_get_qp(qp);
}
hfi1_wait_kmem(qp);
write_sequnlock(&dev->iowait_lock);
hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
@ -674,6 +811,15 @@ static int build_verbs_tx_desc(
return ret;
}
static u64 update_hcrc(u8 opcode, u64 pbc)
{
if ((opcode & IB_OPCODE_TID_RDMA) == IB_OPCODE_TID_RDMA) {
pbc &= ~PBC_INSERT_HCRC_SMASK;
pbc |= (u64)PBC_IHCRC_LKDETH << PBC_INSERT_HCRC_SHIFT;
}
return pbc;
}
int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
@ -719,6 +865,9 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
qp->srate_mbps,
vl,
plen);
/* Update HCRC based on packet opcode */
pbc = update_hcrc(ps->opcode, pbc);
}
tx->wqe = qp->s_wqe;
ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
@ -867,6 +1016,9 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
/* Update HCRC based on packet opcode */
pbc = update_hcrc(ps->opcode, pbc);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);

View File

@ -159,17 +159,38 @@ struct hfi1_qp_priv {
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
struct hfi1_ctxtdata *rcd; /* QP's receive context */
struct page **pages; /* for TID page scan */
u32 tid_enqueue; /* saved when tid waited */
u8 s_sc; /* SC[0..4] for next packet */
struct iowait s_iowait;
struct list_head tid_wait; /* for queueing tid space */
struct hfi1_opfn_data opfn;
struct tid_flow_state flow_state;
struct tid_rdma_qp_params tid_rdma;
struct rvt_qp *owner;
u8 hdr_type; /* 9B or 16B */
unsigned long tid_timer_timeout_jiffies;
/* variables for the TID RDMA SE state machine */
u32 s_flags;
/* For TID RDMA READ */
u32 tid_r_reqs; /* Num of tid reads requested */
u32 tid_r_comp; /* Num of tid reads completed */
u32 pending_tid_r_segs; /* Num of pending tid read segments */
u16 pkts_ps; /* packets per segment */
u8 timeout_shift; /* account for number of packets per segment */
};
struct hfi1_swqe_priv {
struct tid_rdma_request tid_req;
struct rvt_sge_state ss; /* Used for TID RDMA READ Request */
};
struct hfi1_ack_priv {
struct tid_rdma_request tid_req;
};
/*
* This structure is used to hold commonly lookedup and computed values during
* the send engine progress.
@ -231,6 +252,7 @@ struct hfi1_ibdev {
struct kmem_cache *verbs_txreq_cache;
u64 n_txwait;
u64 n_kmem_wait;
u64 n_tidwait;
/* protect iowait lists */
seqlock_t iowait_lock ____cacheline_aligned_in_smp;
@ -318,6 +340,31 @@ static inline u32 delta_psn(u32 a, u32 b)
return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
}
static inline struct tid_rdma_request *wqe_to_tid_req(struct rvt_swqe *wqe)
{
return &((struct hfi1_swqe_priv *)wqe->priv)->tid_req;
}
static inline struct tid_rdma_request *ack_to_tid_req(struct rvt_ack_entry *e)
{
return &((struct hfi1_ack_priv *)e->priv)->tid_req;
}
/*
* Look through all the active flows for a TID RDMA request and find
* the one (if it exists) that contains the specified PSN.
*/
static inline u32 __full_flow_psn(struct flow_state *state, u32 psn)
{
return mask_psn((state->generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
(psn & HFI1_KDETH_BTH_SEQ_MASK));
}
static inline u32 full_flow_psn(struct tid_rdma_flow *flow, u32 psn)
{
return __full_flow_psn(&flow->flow_state, psn);
}
struct verbs_txreq;
void hfi1_put_txreq(struct verbs_txreq *tx);
@ -383,6 +430,10 @@ int hfi1_register_ib_device(struct hfi1_devdata *);
void hfi1_unregister_ib_device(struct hfi1_devdata *);
void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet);
void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet);
void hfi1_ib_rcv(struct hfi1_packet *packet);
void hfi1_16B_rcv(struct hfi1_packet *packet);
@ -400,6 +451,16 @@ static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
}
void hfi1_wait_kmem(struct rvt_qp *qp);
static inline void hfi1_trdma_send_complete(struct rvt_qp *qp,
struct rvt_swqe *wqe,
enum ib_wc_status status)
{
trdma_clean_swqe(qp, wqe);
rvt_send_complete(qp, wqe, status);
}
extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
extern const u8 hdr_len_by_opcode[];

View File

@ -45,12 +45,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 len;
len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
ss->sge = wqe->sg_list[0];
ss->sg_list = wqe->sg_list + 1;
ss->num_sge = wqe->wr.num_sge;
ss->total_len = wqe->length;
rvt_skip_sge(ss, len, false);
return wqe->length - len;
return rvt_restart_sge(ss, wqe, len);
}
/**

View File

@ -1642,11 +1642,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
kref_put(&qp->ip->ref, rvt_release_mmap_info);
else
vfree(qp->r_rq.wq);
vfree(qp->s_wq);
rdi->driver_f.qp_priv_free(rdi, qp);
kfree(qp->s_ack_queue);
rdma_destroy_ah_attr(&qp->remote_ah_attr);
rdma_destroy_ah_attr(&qp->alt_ah_attr);
vfree(qp->s_wq);
kfree(qp);
return 0;
}
@ -2393,11 +2393,12 @@ static inline unsigned long rvt_aeth_to_usec(u32 aeth)
}
/*
* rvt_add_retry_timer - add/start a retry timer
* rvt_add_retry_timer_ext - add/start a retry timer
* @qp - the QP
* @shift - timeout shift to wait for multiple packets
* add a retry timer on the QP
*/
void rvt_add_retry_timer(struct rvt_qp *qp)
void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift)
{
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
@ -2405,11 +2406,11 @@ void rvt_add_retry_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
qp->s_timer.expires = jiffies + qp->timeout_jiffies +
rdi->busy_jiffies;
qp->s_timer.expires = jiffies + rdi->busy_jiffies +
(qp->timeout_jiffies << shift);
add_timer(&qp->s_timer);
}
EXPORT_SYMBOL(rvt_add_retry_timer);
EXPORT_SYMBOL(rvt_add_retry_timer_ext);
/**
* rvt_add_rnr_timer - add/start an rnr timer

View File

@ -187,3 +187,16 @@ void rvt_get_credit(struct rvt_qp *qp, u32 aeth)
}
}
EXPORT_SYMBOL(rvt_get_credit);
/* rvt_restart_sge - rewind the sge state for a wqe */
u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len)
{
ss->sge = wqe->sg_list[0];
ss->sg_list = wqe->sg_list + 1;
ss->num_sge = wqe->wr.num_sge;
ss->total_len = wqe->length;
rvt_skip_sge(ss, len, false);
return wqe->length - len;
}
EXPORT_SYMBOL(rvt_restart_sge);

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2016 Intel Corporation.
* Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -100,6 +100,8 @@ struct ib_atomic_eth {
__be64 compare_data; /* potentially unaligned */
} __packed;
#include <rdma/tid_rdma_defs.h>
union ib_ehdrs {
struct {
__be32 deth[2];
@ -117,6 +119,11 @@ union ib_ehdrs {
__be32 aeth;
__be32 ieth;
struct ib_atomic_eth atomic_eth;
/* TID RDMA headers */
union {
struct tid_rdma_read_req r_req;
struct tid_rdma_read_resp r_rsp;
} tid_rdma;
} __packed;
struct ib_other_headers {

View File

@ -574,9 +574,10 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
/**
* rvt_mod_retry_timer - mod a retry timer
* @qp - the QP
* @shift - timeout shift to wait for multiple packets
* Modify a potentially already running retry timer
*/
static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
{
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
@ -584,8 +585,13 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
rdi->busy_jiffies);
mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies +
(qp->timeout_jiffies << shift));
}
static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
{
return rvt_mod_retry_timer_ext(qp, 0);
}
struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);

View File

@ -174,6 +174,7 @@ struct rvt_swqe {
u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */
u32 length; /* total length of data in sg_list */
void *priv; /* driver dependent field */
struct rvt_sge sg_list[0];
};
@ -235,6 +236,7 @@ struct rvt_ack_entry {
u32 lpsn;
u8 opcode;
u8 sent;
void *priv;
};
#define RC_QP_SCALING_INTERVAL 5
@ -628,6 +630,16 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp);
*/
void rvt_get_credit(struct rvt_qp *qp, u32 aeth);
/**
* rvt_restart_sge - rewind the sge state for a wqe
* @ss: the sge state pointer
* @wqe: the wqe to rewind
* @len: the data length from the start of the wqe in bytes
*
* Returns the remaining data length.
*/
u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len);
/**
* @qp - the qp pair
* @len - the length
@ -676,7 +688,11 @@ enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t);
void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth);
void rvt_del_timers_sync(struct rvt_qp *qp);
void rvt_stop_rc_timers(struct rvt_qp *qp);
void rvt_add_retry_timer(struct rvt_qp *qp);
void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift);
static inline void rvt_add_retry_timer(struct rvt_qp *qp)
{
rvt_add_retry_timer_ext(qp, 0);
}
void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
void *data, u32 length,

View File

@ -0,0 +1,52 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#ifndef TID_RDMA_DEFS_H
#define TID_RDMA_DEFS_H
#include <rdma/ib_pack.h>
struct tid_rdma_read_req {
__le32 kdeth0;
__le32 kdeth1;
struct ib_reth reth;
__be32 tid_flow_psn;
__be32 tid_flow_qp;
__be32 verbs_qp;
};
struct tid_rdma_read_resp {
__le32 kdeth0;
__le32 kdeth1;
__be32 aeth;
__be32 reserved[4];
__be32 verbs_psn;
__be32 verbs_qp;
};
/*
* TID RDMA Opcodes
*/
#define IB_OPCODE_TID_RDMA 0xe0
enum {
IB_OPCODE_READ_REQ = 0x4,
IB_OPCODE_READ_RESP = 0x5,
IB_OPCODE(TID_RDMA, READ_REQ),
IB_OPCODE(TID_RDMA, READ_RESP),
};
#define TID_OP(x) IB_OPCODE_TID_RDMA_##x
/*
* Define TID RDMA specific WR opcodes. The ib_wr_opcode
* enum already provides some reserved values for use by
* low level drivers. Two of those are used but renamed
* to be more descriptive.
*/
#define IB_WR_TID_RDMA_READ IB_WR_RESERVED2
#endif /* TID_RDMA_DEFS_H */