diff -Nru libmlx4-1.0.6/debian/changelog libmlx4-1.0.6/debian/changelog --- libmlx4-1.0.6/debian/changelog 2014-05-06 14:36:02.000000000 -0300 +++ libmlx4-1.0.6/debian/changelog 2015-05-07 16:28:54.000000000 -0300 @@ -1,3 +1,9 @@ +libmlx4 (1.0.6-1ubuntu1) vivid; urgency=medium + + * Add checksum offload support capability (LP: #1409904) + + -- Rafael David Tinoco (Inaddy) Wed, 29 Apr 2015 21:14:09 -0300 + libmlx4 (1.0.6-1) unstable; urgency=low * New upstream release. diff -Nru libmlx4-1.0.6/debian/control libmlx4-1.0.6/debian/control --- libmlx4-1.0.6/debian/control 2014-05-06 14:36:02.000000000 -0300 +++ libmlx4-1.0.6/debian/control 2015-05-07 16:29:26.000000000 -0300 @@ -1,7 +1,8 @@ Source: libmlx4 Priority: extra -Maintainer: Roland Dreier -Build-Depends: debhelper (>= 7.0.50~), dpkg-dev (>= 1.13.19), libibverbs-dev (>= 1.1.8), dh-autoreconf +Maintainer: Ubuntu Developers +XSBC-Original-Maintainer: Roland Dreier +Build-Depends: debhelper (>= 7.0.50~), dpkg-dev (>= 1.13.19), libibverbs-dev (>= 1.1.8-1ubuntu2), dh-autoreconf Standards-Version: 3.9.5 Section: libs Homepage: http://www.openfabrics.org/ @@ -9,7 +10,7 @@ Package: libmlx4-1 Section: libs Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends}, libibverbs1 (>= 1.1.8) +Depends: ${shlibs:Depends}, ${misc:Depends}, libibverbs1 (>= 1.1.8-1ubuntu2) Description: Userspace driver for Mellanox ConnectX InfiniBand HCAs libmlx4 is a device-specific driver for Mellanox ConnectX InfiniBand host channel adapters (HCAs) for the libibverbs library. This allows diff -Nru libmlx4-1.0.6/debian/patches/Add-checksum-offload-support-capability.patch libmlx4-1.0.6/debian/patches/Add-checksum-offload-support-capability.patch --- libmlx4-1.0.6/debian/patches/Add-checksum-offload-support-capability.patch 1969-12-31 21:00:00.000000000 -0300 +++ libmlx4-1.0.6/debian/patches/Add-checksum-offload-support-capability.patch 2015-05-07 16:28:30.000000000 -0300 @@ -0,0 +1,767 @@ +Description: Add receive flow steering support + +The implementation uses the default commands from libibverbs. + +Author: Matan Barak +Signed-off-by: Matan Barak +Signed-off-by: Or Gerlitz +Signed-off-by: Roland Dreier + +Origin: upstream, commit: 1b6875d4c2cba3b751ad0b4286cf52b6a22bff97 +Bug-Ubuntu: https://launchpad.net/bugs/1409904 +Last-Update: 2015-02-09 + +--- libmlx4-1.0.6.orig/src/qp.c ++++ libmlx4-1.0.6/src/qp.c +@@ -286,6 +286,10 @@ int mlx4_post_send(struct ibv_qp *ibqp, + break; + + case IBV_QPT_UD: ++ ctrl->srcrb_flags |= ++ wr->send_flags & IBV_SEND_IP_CSUM ? ++ htonl(MLX4_WQE_CTRL_IP_CSUM | ++ MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0; + set_datagram_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_datagram_seg); + size += sizeof (struct mlx4_wqe_datagram_seg) / 16; +@@ -294,7 +298,12 @@ int mlx4_post_send(struct ibv_qp *ibqp, + case IBV_QPT_RAW_PACKET: + /* For raw eth, the MLX4_WQE_CTRL_SOLICIT flag is used + * to indicate that no icrc should be calculated */ +- ctrl->srcrb_flags |= htonl(MLX4_WQE_CTRL_SOLICIT); ++ ctrl->srcrb_flags |= ++ wr->send_flags & IBV_SEND_IP_CSUM ? ++ htonl(MLX4_WQE_CTRL_IP_CSUM | ++ MLX4_WQE_CTRL_TCP_UDP_CSUM | ++ MLX4_WQE_CTRL_SOLICIT) : ++ htonl(MLX4_WQE_CTRL_SOLICIT); + break; + + default: +--- /dev/null ++++ libmlx4-1.0.6/src/qp.c.orig +@@ -0,0 +1,713 @@ ++/* ++ * Copyright (c) 2005 Topspin Communications. All rights reserved. ++ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. ++ * Copyright (c) 2007 Cisco, Inc. All rights reserved. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#if HAVE_CONFIG_H ++# include ++#endif /* HAVE_CONFIG_H */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "mlx4.h" ++#include "doorbell.h" ++#include "wqe.h" ++ ++static const uint32_t mlx4_ib_opcode[] = { ++ [IBV_WR_SEND] = MLX4_OPCODE_SEND, ++ [IBV_WR_SEND_WITH_IMM] = MLX4_OPCODE_SEND_IMM, ++ [IBV_WR_RDMA_WRITE] = MLX4_OPCODE_RDMA_WRITE, ++ [IBV_WR_RDMA_WRITE_WITH_IMM] = MLX4_OPCODE_RDMA_WRITE_IMM, ++ [IBV_WR_RDMA_READ] = MLX4_OPCODE_RDMA_READ, ++ [IBV_WR_ATOMIC_CMP_AND_SWP] = MLX4_OPCODE_ATOMIC_CS, ++ [IBV_WR_ATOMIC_FETCH_AND_ADD] = MLX4_OPCODE_ATOMIC_FA, ++}; ++ ++static void *get_recv_wqe(struct mlx4_qp *qp, int n) ++{ ++ return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); ++} ++ ++static void *get_send_wqe(struct mlx4_qp *qp, int n) ++{ ++ return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift); ++} ++ ++/* ++ * Stamp a SQ WQE so that it is invalid if prefetched by marking the ++ * first four bytes of every 64 byte chunk with 0xffffffff, except for ++ * the very first chunk of the WQE. ++ */ ++static void stamp_send_wqe(struct mlx4_qp *qp, int n) ++{ ++ uint32_t *wqe = get_send_wqe(qp, n); ++ int i; ++ int ds = (((struct mlx4_wqe_ctrl_seg *)wqe)->fence_size & 0x3f) << 2; ++ ++ for (i = 16; i < ds; i += 16) ++ wqe[i] = 0xffffffff; ++} ++ ++void mlx4_init_qp_indices(struct mlx4_qp *qp) ++{ ++ qp->sq.head = 0; ++ qp->sq.tail = 0; ++ qp->rq.head = 0; ++ qp->rq.tail = 0; ++} ++ ++void mlx4_qp_init_sq_ownership(struct mlx4_qp *qp) ++{ ++ struct mlx4_wqe_ctrl_seg *ctrl; ++ int i; ++ ++ for (i = 0; i < qp->sq.wqe_cnt; ++i) { ++ ctrl = get_send_wqe(qp, i); ++ ctrl->owner_opcode = htonl(1 << 31); ++ ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); ++ ++ stamp_send_wqe(qp, i); ++ } ++} ++ ++static int wq_overflow(struct mlx4_wq *wq, int nreq, struct mlx4_cq *cq) ++{ ++ unsigned cur; ++ ++ cur = wq->head - wq->tail; ++ if (cur + nreq < wq->max_post) ++ return 0; ++ ++ pthread_spin_lock(&cq->lock); ++ cur = wq->head - wq->tail; ++ pthread_spin_unlock(&cq->lock); ++ ++ return cur + nreq >= wq->max_post; ++} ++ ++static inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, ++ uint64_t remote_addr, uint32_t rkey) ++{ ++ rseg->raddr = htonll(remote_addr); ++ rseg->rkey = htonl(rkey); ++ rseg->reserved = 0; ++} ++ ++static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ibv_send_wr *wr) ++{ ++ if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) { ++ aseg->swap_add = htonll(wr->wr.atomic.swap); ++ aseg->compare = htonll(wr->wr.atomic.compare_add); ++ } else { ++ aseg->swap_add = htonll(wr->wr.atomic.compare_add); ++ aseg->compare = 0; ++ } ++ ++} ++ ++static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, ++ struct ibv_send_wr *wr) ++{ ++ memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); ++ dseg->dqpn = htonl(wr->wr.ud.remote_qpn); ++ dseg->qkey = htonl(wr->wr.ud.remote_qkey); ++ dseg->vlan = htons(to_mah(wr->wr.ud.ah)->vlan); ++ memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->mac, 6); ++} ++ ++static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg) ++{ ++ dseg->byte_count = htonl(sg->length); ++ dseg->lkey = htonl(sg->lkey); ++ dseg->addr = htonll(sg->addr); ++} ++ ++static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg) ++{ ++ dseg->lkey = htonl(sg->lkey); ++ dseg->addr = htonll(sg->addr); ++ ++ /* ++ * Need a barrier here before writing the byte_count field to ++ * make sure that all the data is visible before the ++ * byte_count field is set. Otherwise, if the segment begins ++ * a new cacheline, the HCA prefetcher could grab the 64-byte ++ * chunk and get a valid (!= * 0xffffffff) byte count but ++ * stale data, and end up sending the wrong data. ++ */ ++ wmb(); ++ ++ dseg->byte_count = htonl(sg->length); ++} ++ ++/* ++ * Avoid using memcpy() to copy to BlueFlame page, since memcpy() ++ * implementations may use move-string-buffer assembler instructions, ++ * which do not guarantee order of copying. ++ */ ++static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned bytecnt) ++{ ++ while (bytecnt > 0) { ++ *dst++ = *src++; ++ *dst++ = *src++; ++ bytecnt -= 2 * sizeof (long); ++ } ++} ++ ++int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, ++ struct ibv_send_wr **bad_wr) ++{ ++ struct mlx4_context *ctx; ++ struct mlx4_qp *qp = to_mqp(ibqp); ++ void *wqe; ++ struct mlx4_wqe_ctrl_seg *ctrl; ++ int ind; ++ int nreq; ++ int inl = 0; ++ int ret = 0; ++ int size; ++ int i; ++ ++ pthread_spin_lock(&qp->sq.lock); ++ ++ /* XXX check that state is OK to post send */ ++ ++ ind = qp->sq.head; ++ ++ for (nreq = 0; wr; ++nreq, wr = wr->next) { ++ if (wq_overflow(&qp->sq, nreq, to_mcq(ibqp->send_cq))) { ++ ret = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (wr->num_sge > qp->sq.max_gs) { ++ ret = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (wr->opcode >= sizeof mlx4_ib_opcode / sizeof mlx4_ib_opcode[0]) { ++ ret = EINVAL; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); ++ qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; ++ ++ ctrl->srcrb_flags = ++ (wr->send_flags & IBV_SEND_SIGNALED ? ++ htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | ++ (wr->send_flags & IBV_SEND_SOLICITED ? ++ htonl(MLX4_WQE_CTRL_SOLICIT) : 0) | ++ qp->sq_signal_bits; ++ ++ if (wr->opcode == IBV_WR_SEND_WITH_IMM || ++ wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) ++ ctrl->imm = wr->imm_data; ++ else ++ ctrl->imm = 0; ++ ++ wqe += sizeof *ctrl; ++ size = sizeof *ctrl / 16; ++ ++ switch (ibqp->qp_type) { ++ case IBV_QPT_XRC_SEND: ++ ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr); ++ /* fall through */ ++ case IBV_QPT_RC: ++ case IBV_QPT_UC: ++ switch (wr->opcode) { ++ case IBV_WR_ATOMIC_CMP_AND_SWP: ++ case IBV_WR_ATOMIC_FETCH_AND_ADD: ++ set_raddr_seg(wqe, wr->wr.atomic.remote_addr, ++ wr->wr.atomic.rkey); ++ wqe += sizeof (struct mlx4_wqe_raddr_seg); ++ ++ set_atomic_seg(wqe, wr); ++ wqe += sizeof (struct mlx4_wqe_atomic_seg); ++ size += (sizeof (struct mlx4_wqe_raddr_seg) + ++ sizeof (struct mlx4_wqe_atomic_seg)) / 16; ++ ++ break; ++ ++ case IBV_WR_RDMA_READ: ++ inl = 1; ++ /* fall through */ ++ case IBV_WR_RDMA_WRITE: ++ case IBV_WR_RDMA_WRITE_WITH_IMM: ++ if (!wr->num_sge) ++ inl = 1; ++ set_raddr_seg(wqe, wr->wr.rdma.remote_addr, ++ wr->wr.rdma.rkey); ++ wqe += sizeof (struct mlx4_wqe_raddr_seg); ++ size += sizeof (struct mlx4_wqe_raddr_seg) / 16; ++ ++ break; ++ ++ default: ++ /* No extra segments required for sends */ ++ break; ++ } ++ break; ++ ++ case IBV_QPT_UD: ++ set_datagram_seg(wqe, wr); ++ wqe += sizeof (struct mlx4_wqe_datagram_seg); ++ size += sizeof (struct mlx4_wqe_datagram_seg) / 16; ++ break; ++ ++ case IBV_QPT_RAW_PACKET: ++ /* For raw eth, the MLX4_WQE_CTRL_SOLICIT flag is used ++ * to indicate that no icrc should be calculated */ ++ ctrl->srcrb_flags |= htonl(MLX4_WQE_CTRL_SOLICIT); ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { ++ struct mlx4_wqe_inline_seg *seg; ++ void *addr; ++ int len, seg_len; ++ int num_seg; ++ int off, to_copy; ++ ++ inl = 0; ++ ++ seg = wqe; ++ wqe += sizeof *seg; ++ off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1); ++ num_seg = 0; ++ seg_len = 0; ++ ++ for (i = 0; i < wr->num_sge; ++i) { ++ addr = (void *) (uintptr_t) wr->sg_list[i].addr; ++ len = wr->sg_list[i].length; ++ inl += len; ++ ++ if (inl > qp->max_inline_data) { ++ inl = 0; ++ ret = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ while (len >= MLX4_INLINE_ALIGN - off) { ++ to_copy = MLX4_INLINE_ALIGN - off; ++ memcpy(wqe, addr, to_copy); ++ len -= to_copy; ++ wqe += to_copy; ++ addr += to_copy; ++ seg_len += to_copy; ++ wmb(); /* see comment below */ ++ seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len); ++ seg_len = 0; ++ seg = wqe; ++ wqe += sizeof *seg; ++ off = sizeof *seg; ++ ++num_seg; ++ } ++ ++ memcpy(wqe, addr, len); ++ wqe += len; ++ seg_len += len; ++ off += len; ++ } ++ ++ if (seg_len) { ++ ++num_seg; ++ /* ++ * Need a barrier here to make sure ++ * all the data is visible before the ++ * byte_count field is set. Otherwise ++ * the HCA prefetcher could grab the ++ * 64-byte chunk with this inline ++ * segment and get a valid (!= ++ * 0xffffffff) byte count but stale ++ * data, and end up sending the wrong ++ * data. ++ */ ++ wmb(); ++ seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len); ++ } ++ ++ size += (inl + num_seg * sizeof * seg + 15) / 16; ++ } else { ++ struct mlx4_wqe_data_seg *seg = wqe; ++ ++ for (i = wr->num_sge - 1; i >= 0 ; --i) ++ set_data_seg(seg + i, wr->sg_list + i); ++ ++ size += wr->num_sge * (sizeof *seg / 16); ++ } ++ ++ ctrl->fence_size = (wr->send_flags & IBV_SEND_FENCE ? ++ MLX4_WQE_CTRL_FENCE : 0) | size; ++ ++ /* ++ * Make sure descriptor is fully written before ++ * setting ownership bit (because HW can start ++ * executing as soon as we do). ++ */ ++ wmb(); ++ ++ ctrl->owner_opcode = htonl(mlx4_ib_opcode[wr->opcode]) | ++ (ind & qp->sq.wqe_cnt ? htonl(1 << 31) : 0); ++ ++ /* ++ * We can improve latency by not stamping the last ++ * send queue WQE until after ringing the doorbell, so ++ * only stamp here if there are still more WQEs to post. ++ */ ++ if (wr->next) ++ stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) & ++ (qp->sq.wqe_cnt - 1)); ++ ++ ++ind; ++ } ++ ++out: ++ ctx = to_mctx(ibqp->context); ++ ++ if (nreq == 1 && inl && size > 1 && size <= ctx->bf_buf_size / 16) { ++ ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8); ++ *(uint32_t *) ctrl->reserved |= qp->doorbell_qpn; ++ /* ++ * Make sure that descriptor is written to memory ++ * before writing to BlueFlame page. ++ */ ++ wmb(); ++ ++ ++qp->sq.head; ++ ++ pthread_spin_lock(&ctx->bf_lock); ++ ++ mlx4_bf_copy(ctx->bf_page + ctx->bf_offset, (unsigned long *) ctrl, ++ align(size * 16, 64)); ++ wc_wmb(); ++ ++ ctx->bf_offset ^= ctx->bf_buf_size; ++ ++ pthread_spin_unlock(&ctx->bf_lock); ++ } else if (nreq) { ++ qp->sq.head += nreq; ++ ++ /* ++ * Make sure that descriptors are written before ++ * doorbell record. ++ */ ++ wmb(); ++ ++ *(uint32_t *) (ctx->uar + MLX4_SEND_DOORBELL) = qp->doorbell_qpn; ++ } ++ ++ if (nreq) ++ stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) & ++ (qp->sq.wqe_cnt - 1)); ++ ++ pthread_spin_unlock(&qp->sq.lock); ++ ++ return ret; ++} ++ ++int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, ++ struct ibv_recv_wr **bad_wr) ++{ ++ struct mlx4_qp *qp = to_mqp(ibqp); ++ struct mlx4_wqe_data_seg *scat; ++ int ret = 0; ++ int nreq; ++ int ind; ++ int i; ++ ++ pthread_spin_lock(&qp->rq.lock); ++ ++ /* XXX check that state is OK to post receive */ ++ ++ ind = qp->rq.head & (qp->rq.wqe_cnt - 1); ++ ++ for (nreq = 0; wr; ++nreq, wr = wr->next) { ++ if (wq_overflow(&qp->rq, nreq, to_mcq(ibqp->recv_cq))) { ++ ret = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (wr->num_sge > qp->rq.max_gs) { ++ ret = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ scat = get_recv_wqe(qp, ind); ++ ++ for (i = 0; i < wr->num_sge; ++i) ++ __set_data_seg(scat + i, wr->sg_list + i); ++ ++ if (i < qp->rq.max_gs) { ++ scat[i].byte_count = 0; ++ scat[i].lkey = htonl(MLX4_INVALID_LKEY); ++ scat[i].addr = 0; ++ } ++ ++ qp->rq.wrid[ind] = wr->wr_id; ++ ++ ind = (ind + 1) & (qp->rq.wqe_cnt - 1); ++ } ++ ++out: ++ if (nreq) { ++ qp->rq.head += nreq; ++ ++ /* ++ * Make sure that descriptors are written before ++ * doorbell record. ++ */ ++ wmb(); ++ ++ *qp->db = htonl(qp->rq.head & 0xffff); ++ } ++ ++ pthread_spin_unlock(&qp->rq.lock); ++ ++ return ret; ++} ++ ++static int num_inline_segs(int data, enum ibv_qp_type type) ++{ ++ /* ++ * Inline data segments are not allowed to cross 64 byte ++ * boundaries. For UD QPs, the data segments always start ++ * aligned to 64 bytes (16 byte control segment + 48 byte ++ * datagram segment); for other QPs, there will be a 16 byte ++ * control segment and possibly a 16 byte remote address ++ * segment, so in the worst case there will be only 32 bytes ++ * available for the first data segment. ++ */ ++ if (type == IBV_QPT_UD) ++ data += (sizeof (struct mlx4_wqe_ctrl_seg) + ++ sizeof (struct mlx4_wqe_datagram_seg)) % ++ MLX4_INLINE_ALIGN; ++ else ++ data += (sizeof (struct mlx4_wqe_ctrl_seg) + ++ sizeof (struct mlx4_wqe_raddr_seg)) % ++ MLX4_INLINE_ALIGN; ++ ++ return (data + MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg) - 1) / ++ (MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg)); ++} ++ ++void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type, ++ struct mlx4_qp *qp) ++{ ++ int size; ++ int max_sq_sge; ++ ++ max_sq_sge = align(cap->max_inline_data + ++ num_inline_segs(cap->max_inline_data, type) * ++ sizeof (struct mlx4_wqe_inline_seg), ++ sizeof (struct mlx4_wqe_data_seg)) / ++ sizeof (struct mlx4_wqe_data_seg); ++ if (max_sq_sge < cap->max_send_sge) ++ max_sq_sge = cap->max_send_sge; ++ ++ size = max_sq_sge * sizeof (struct mlx4_wqe_data_seg); ++ switch (type) { ++ case IBV_QPT_UD: ++ size += sizeof (struct mlx4_wqe_datagram_seg); ++ break; ++ ++ case IBV_QPT_UC: ++ size += sizeof (struct mlx4_wqe_raddr_seg); ++ break; ++ ++ case IBV_QPT_XRC_SEND: ++ case IBV_QPT_RC: ++ size += sizeof (struct mlx4_wqe_raddr_seg); ++ /* ++ * An atomic op will require an atomic segment, a ++ * remote address segment and one scatter entry. ++ */ ++ if (size < (sizeof (struct mlx4_wqe_atomic_seg) + ++ sizeof (struct mlx4_wqe_raddr_seg) + ++ sizeof (struct mlx4_wqe_data_seg))) ++ size = (sizeof (struct mlx4_wqe_atomic_seg) + ++ sizeof (struct mlx4_wqe_raddr_seg) + ++ sizeof (struct mlx4_wqe_data_seg)); ++ break; ++ ++ default: ++ break; ++ } ++ ++ /* Make sure that we have enough space for a bind request */ ++ if (size < sizeof (struct mlx4_wqe_bind_seg)) ++ size = sizeof (struct mlx4_wqe_bind_seg); ++ ++ size += sizeof (struct mlx4_wqe_ctrl_seg); ++ ++ for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; ++ qp->sq.wqe_shift++) ++ ; /* nothing */ ++} ++ ++int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap, ++ enum ibv_qp_type type, struct mlx4_qp *qp) ++{ ++ qp->rq.max_gs = cap->max_recv_sge; ++ ++ if (qp->sq.wqe_cnt) { ++ qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t)); ++ if (!qp->sq.wrid) ++ return -1; ++ } ++ ++ if (qp->rq.wqe_cnt) { ++ qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t)); ++ if (!qp->rq.wrid) { ++ free(qp->sq.wrid); ++ return -1; ++ } ++ } ++ ++ for (qp->rq.wqe_shift = 4; ++ 1 << qp->rq.wqe_shift < qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg); ++ qp->rq.wqe_shift++) ++ ; /* nothing */ ++ ++ qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + ++ (qp->sq.wqe_cnt << qp->sq.wqe_shift); ++ if (qp->rq.wqe_shift > qp->sq.wqe_shift) { ++ qp->rq.offset = 0; ++ qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; ++ } else { ++ qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; ++ qp->sq.offset = 0; ++ } ++ ++ if (qp->buf_size) { ++ if (mlx4_alloc_buf(&qp->buf, ++ align(qp->buf_size, to_mdev(context->device)->page_size), ++ to_mdev(context->device)->page_size)) { ++ free(qp->sq.wrid); ++ free(qp->rq.wrid); ++ return -1; ++ } ++ ++ memset(qp->buf.buf, 0, qp->buf_size); ++ } else { ++ qp->buf.buf = NULL; ++ } ++ ++ return 0; ++} ++ ++void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap, ++ enum ibv_qp_type type) ++{ ++ int wqe_size; ++ ++ wqe_size = (1 << qp->sq.wqe_shift) - sizeof (struct mlx4_wqe_ctrl_seg); ++ switch (type) { ++ case IBV_QPT_UD: ++ wqe_size -= sizeof (struct mlx4_wqe_datagram_seg); ++ break; ++ ++ case IBV_QPT_XRC_SEND: ++ case IBV_QPT_UC: ++ case IBV_QPT_RC: ++ wqe_size -= sizeof (struct mlx4_wqe_raddr_seg); ++ break; ++ ++ default: ++ break; ++ } ++ ++ qp->sq.max_gs = wqe_size / sizeof (struct mlx4_wqe_data_seg); ++ cap->max_send_sge = qp->sq.max_gs; ++ qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes; ++ cap->max_send_wr = qp->sq.max_post; ++ ++ /* ++ * Inline data segments can't cross a 64 byte boundary. So ++ * subtract off one segment header for each 64-byte chunk, ++ * taking into account the fact that wqe_size will be 32 mod ++ * 64 for non-UD QPs. ++ */ ++ qp->max_inline_data = wqe_size - ++ sizeof (struct mlx4_wqe_inline_seg) * ++ (align(wqe_size, MLX4_INLINE_ALIGN) / MLX4_INLINE_ALIGN); ++ cap->max_inline_data = qp->max_inline_data; ++} ++ ++struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn) ++{ ++ int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; ++ ++ if (ctx->qp_table[tind].refcnt) ++ return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; ++ else ++ return NULL; ++} ++ ++int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp) ++{ ++ int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; ++ ++ if (!ctx->qp_table[tind].refcnt) { ++ ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1, ++ sizeof (struct mlx4_qp *)); ++ if (!ctx->qp_table[tind].table) ++ return -1; ++ } ++ ++ ++ctx->qp_table[tind].refcnt; ++ ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp; ++ return 0; ++} ++ ++void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn) ++{ ++ int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; ++ ++ if (!--ctx->qp_table[tind].refcnt) ++ free(ctx->qp_table[tind].table); ++ else ++ ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL; ++} +--- libmlx4-1.0.6.orig/src/wqe.h ++++ libmlx4-1.0.6/src/wqe.h +@@ -41,6 +41,8 @@ enum { + MLX4_WQE_CTRL_FENCE = 1 << 6, + MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, + MLX4_WQE_CTRL_SOLICIT = 1 << 1, ++ MLX4_WQE_CTRL_IP_CSUM = 1 << 4, ++ MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5 + }; + + enum { diff -Nru libmlx4-1.0.6/debian/patches/series libmlx4-1.0.6/debian/patches/series --- libmlx4-1.0.6/debian/patches/series 2014-05-06 14:36:02.000000000 -0300 +++ libmlx4-1.0.6/debian/patches/series 2015-05-07 16:28:30.000000000 -0300 @@ -1 +1,2 @@ driver-plugin-directory.patch +Add-checksum-offload-support-capability.patch