Cdiff rpcib.c
*** /webrev/webrev/usr/src/uts/common/rpc/rpcib.c- Mon Aug 14 13:12:11 2006
--- rpcib.c Thu Aug 10 14:05:27 2006
*** 22,31 ****
--- 22,45 ----
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+
+ /* Copyright (c) 2006, The Ohio State University. All rights reserved.
+ *
+ * Portions of this source code is developed by the team members of
+ * The Ohio State University's Network-Based Computing Laboratory (NBCL),
+ * headed by Professor Dhabaleswar K. (DK) Panda.
+ *
+ * Acknowledgements to contributions from developors:
+ * Ranjit Noronha: noronha@cse.ohio-state.edu
+ * Lei Chai : chail@cse.ohio-state.edu
+ * Weikuan Yu : yuw@cse.ohio-state.edu
+ *
+ */
+
#pragma ident "@(#)rpcib.c 1.29 06/01/25 SMI"
/*
* The rpcib plugin. Implements the interface for RDMATF's
* interaction with IBTF.
*** 55,64 ****
--- 69,80 ----
#include <sys/isa_defs.h>
#include <sys/callb.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
+ /* #define IB_FMR_SUP */
+ /* #define CLNT_POLL_CQ */
#include <sys/ib/ibtl/ibti.h>
#include <rpc/rpc.h>
#include <rpc/ib.h>
#include <sys/modctl.h>
*** 68,78 ****
--- 84,97 ----
#include <sys/sockio.h>
#include <sys/vnode.h>
#include <sys/tiuser.h>
#include <net/if.h>
#include <sys/cred.h>
+ #include <rpc/rpc_rdma.h>
+ int num_clients = 0;
+ volatile uint32_t is_server = 0;
extern char *inet_ntop(int, const void *, char *, int);
/*
*** 105,114 ****
--- 124,136 ----
CB_REV, /* rev */
nodev, /* int (*cb_aread)() */
nodev /* int (*cb_awrite)() */
};
+
+
+
/*
* Device options
*/
static struct dev_ops rpcib_ops = {
DEVO_REV, /* devo_rev, */
*** 138,158 ****
MODREV_1,
(void *)&rib_modldrv,
NULL
};
/*
* rib_stat: private data pointer used when registering
* with the IBTF. It is returned to the consumer
* in all callbacks.
*/
static rpcib_state_t *rib_stat = NULL;
! #define RNR_RETRIES 2
#define MAX_PORTS 2
! int preposted_rbufs = 16;
int send_threshold = 1;
/*
* State of the plugin.
* ACCEPT = accepting new connections and requests.
--- 160,200 ----
MODREV_1,
(void *)&rib_modldrv,
NULL
};
+ #ifdef SERVER_REG_CACHE
+ typedef struct cache_struct {
+ avl_node_t avl_link;
+ rib_lrc_entry_t r;
+ uint32_t len;
+ uint32_t elements;
+ kmutex_t node_lock;
+ } cache_avl_struct_t;
+
+
+ #if 1
+ int rib_total_buffers = 0;
+ #endif
+ #endif
/*
* rib_stat: private data pointer used when registering
* with the IBTF. It is returned to the consumer
* in all callbacks.
*/
static rpcib_state_t *rib_stat = NULL;
! #define RNR_RETRIES IBT_RNR_INFINITE_RETRY
#define MAX_PORTS 2
! #ifdef IB_FMR_SUP
! #define IB_FMR_DIRTY_MARK 32
! #define IB_FMR_MAX_SIZE 1048576
! /*#define IB_FMR_MAX_SIZE 32768 */
! #endif
!
! int preposted_rbufs = RDMA_BUFS_GRANT;
int send_threshold = 1;
/*
* State of the plugin.
* ACCEPT = accepting new connections and requests.
*** 167,188 ****
/*
* RPCIB RDMATF operations
*/
static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle);
static rdma_stat rib_disconnect(CONN *conn);
static void rib_listen(struct rdma_svc_data *rd);
static void rib_listen_stop(struct rdma_svc_data *rd);
! static rdma_stat rib_registermem(CONN *conn, caddr_t buf, uint_t buflen,
struct mrc *buf_handle);
static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf,
struct mrc buf_handle);
! static rdma_stat rib_registermemsync(CONN *conn, caddr_t buf, uint_t buflen,
struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle);
static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle);
static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle,
caddr_t buf, int len, int cpu);
static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf);
--- 209,245 ----
/*
* RPCIB RDMATF operations
*/
+ #if defined(MEASURE_POOL_DEPTH)
+ static void rib_posted_rbufs(uint32_t x) { return;}
+ #endif
static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle);
static rdma_stat rib_disconnect(CONN *conn);
static void rib_listen(struct rdma_svc_data *rd);
static void rib_listen_stop(struct rdma_svc_data *rd);
! static rdma_stat rib_registermem(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen,
struct mrc *buf_handle);
static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf,
struct mrc buf_handle);
! static rdma_stat rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp,
! caddr_t buf, uint_t buflen, struct mrc *buf_handle);
! static rdma_stat rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf,
! struct mrc buf_handle);
! #ifdef SERVER_REG_CACHE
! static rdma_stat rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen,
! struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc);
! static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
! struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle, void *);
! #else
! static rdma_stat rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen,
struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle);
static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle);
+
+ #endif
static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle,
caddr_t buf, int len, int cpu);
static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf);
*** 190,199 ****
--- 247,266 ----
static void *rib_rbuf_alloc(CONN *, rdma_buf_t *);
static void rib_rbuf_free(CONN *conn, int ptype, void *buf);
static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid);
+ #if defined (CLNT_INTERRUPT_COAL)
+ static void rib_scq_free(caddr_t);
+ static rdma_stat rib_send_bl(CONN *conn, struct clist *cl, uint32_t msgid);
+ #endif
+ #if defined(ASYNC_SERVER_DEREG)
+ static rdma_stat rib_send_nw(CONN *conn, struct clist *cl, uint32_t msgid, caddr_t, caddr_t, int, caddr_t, int, int, int);
+ #endif
+ #if defined(ASYNC_CLIENT_DEREG)
+ static void insert_queue(CONN *conn, struct clist *rwc);
+ #endif
static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid);
static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid);
static rdma_stat rib_post_recv(CONN *conn, struct clist *cl);
static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid);
static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait);
*** 200,209 ****
--- 267,289 ----
static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait);
static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rib_hca_t **);
static rdma_stat rib_conn_get(struct netbuf *, int addr_type, void *, CONN **);
static rdma_stat rib_conn_release(CONN *conn);
static rdma_stat rib_getinfo(rdma_info_t *info);
+ #ifdef DYNAMIC_CREDIT_CONTROL
+ void rib_get_resource_info(CONN *, int *, int *);
+ #endif
+
+ #ifdef SERVER_REG_CACHE
+ static rib_lrc_entry_t *rib_get_server_cache_buf(CONN *conn, uint32_t len);
+ static void rib_free_server_cache_buf(CONN *conn, rib_lrc_entry_t *buf);
+ static void rib_destroy_cache(rib_hca_t *hca);
+ static void
+ rib_server_side_cache_reclaim(void *argp);
+ static int avl_compare(const void *t1,const void *t2);
+ #endif
+
static rdma_stat rib_register_ats(rib_hca_t *);
static void rib_deregister_ats();
static void rib_stop_services(rib_hca_t *);
/*
*** 213,224 ****
int get_interfaces(TIUSER *tiptr, int *num);
int find_addrs(TIUSER *tiptr, char **addrs, int num_ifs);
int get_ibd_ipaddr(rpcib_ibd_insts_t *);
rpcib_ats_t *get_ibd_entry(ib_gid_t *, ib_pkey_t, rpcib_ibd_insts_t *);
void rib_get_ibd_insts(rpcib_ibd_insts_t *);
!
/*
* RDMA operations the RPCIB module exports
*/
static rdmaops_t rib_ops = {
rib_reachable,
--- 293,319 ----
int get_interfaces(TIUSER *tiptr, int *num);
int find_addrs(TIUSER *tiptr, char **addrs, int num_ifs);
int get_ibd_ipaddr(rpcib_ibd_insts_t *);
rpcib_ats_t *get_ibd_entry(ib_gid_t *, ib_pkey_t, rpcib_ibd_insts_t *);
void rib_get_ibd_insts(rpcib_ibd_insts_t *);
+ #if defined(ASYNC_SERVER_DEREG)||defined(ASYNC_CLIENT_DEREG)
+ static int clist_deregister1(CONN *, struct clist *, bool_t );
+ #endif
! #if defined(ASYNC_CLIENT_DEREG)
! typedef struct async_dereg {
! struct async_dereg *forw;
! struct async_dereg *back;
! CONN c_conn;
! struct clist c_clist;
! } ASYNC;
! static void async_dereg_thread(caddr_t arg);
! extern pri_t minclsyspri; /* priority for taskq */
! static ASYNC rqueue;
! static kmutex_t at_mutex;
! static kcondvar_t at_cond;
! #endif
/*
* RDMA operations the RPCIB module exports
*/
static rdmaops_t rib_ops = {
rib_reachable,
*** 232,248 ****
rib_deregistermemsync,
rib_syncmem,
rib_reg_buf_alloc,
rib_reg_buf_free,
rib_send,
rib_send_resp,
rib_post_resp,
rib_post_recv,
rib_recv,
rib_read,
rib_write,
! rib_getinfo
};
/*
* RDMATF RPCIB plugin details
*/
--- 327,359 ----
rib_deregistermemsync,
rib_syncmem,
rib_reg_buf_alloc,
rib_reg_buf_free,
rib_send,
+ #if defined (CLNT_INTERRUPT_COAL)
+ rib_send_bl,
+ #endif
+ #if defined(ASYNC_SERVER_DEREG)
+ rib_send_nw,
+ #endif
rib_send_resp,
rib_post_resp,
rib_post_recv,
rib_recv,
rib_read,
rib_write,
! rib_getinfo,
! #ifdef SERVER_REG_CACHE
! rib_get_server_cache_buf,
! rib_free_server_cache_buf,
! #endif
! #ifdef DYNAMIC_CREDIT_CONTROL
! rib_get_resource_info,
! #endif
! #if defined(ASYNC_CLIENT_DEREG)
! insert_queue,
! #endif
};
/*
* RDMATF RPCIB plugin details
*/
*** 258,269 ****
static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num);
! static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
ibt_mr_hdl_t *, ibt_mr_desc_t *);
static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, ibt_path_info_t *);
static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *,
rib_qp_t **);
static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t,
rib_qp_t **);
--- 369,386 ----
static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num);
! #ifdef IB_FMR_SUP
! static rdma_stat rib_reg_mem_fmr(rib_hca_t *, caddr_t adsp,caddr_t, uint_t, ibt_mr_flags_t,
! ibt_mr_hdl_t *, ibt_ma_hdl_t *, ibt_pmr_desc_t *);
! #endif
! static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t adsp, caddr_t, uint_t, ibt_mr_flags_t,
ibt_mr_hdl_t *, ibt_mr_desc_t *);
+ static rdma_stat rib_reg_mem_user(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
+ ibt_mr_hdl_t *, ibt_mr_desc_t *, caddr_t);
static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, ibt_path_info_t *);
static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *,
rib_qp_t **);
static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t,
rib_qp_t **);
*** 314,325 ****
* debugging in rpcib kernel module.
* Set it to values greater that 1 to control
* the amount of debugging messages required.
*/
int rib_debug = 0;
!
static int ats_running = 0;
int
_init(void)
{
int error;
--- 431,446 ----
* debugging in rpcib kernel module.
* Set it to values greater that 1 to control
* the amount of debugging messages required.
*/
int rib_debug = 0;
! #if defined(CLNT_POLL_CQ)
! int max_poll_count = 500;
! #endif
static int ats_running = 0;
+
+
int
_init(void)
{
int error;
*** 571,580 ****
--- 692,702 ----
static rdma_stat rib_rem_replylist(rib_qp_t *);
static int rib_remreply(rib_qp_t *, struct reply *);
static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *);
static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *);
+
/*
* One CQ pair per HCA
*/
static rdma_stat
rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler,
*** 631,641 ****
rdma_stat status;
ibt_hca_portinfo_t *pinfop;
ibt_pd_flags_t pd_flags = IBT_PD_NO_FLAGS;
uint_t size, cq_size;
int i;
!
ASSERT(MUTEX_HELD(&ribstat->open_hca_lock));
if (ribstat->hcas == NULL)
ribstat->hcas = kmem_zalloc(ribstat->hca_count *
sizeof (rib_hca_t), KM_SLEEP);
--- 753,766 ----
rdma_stat status;
ibt_hca_portinfo_t *pinfop;
ibt_pd_flags_t pd_flags = IBT_PD_NO_FLAGS;
uint_t size, cq_size;
int i;
! #ifdef IB_FMR_SUP
! ibt_fmr_pool_attr_t fmr_attr;
! uint_t h_page_sz;
! #endif
ASSERT(MUTEX_HELD(&ribstat->open_hca_lock));
if (ribstat->hcas == NULL)
ribstat->hcas = kmem_zalloc(ribstat->hca_count *
sizeof (rib_hca_t), KM_SLEEP);
*** 744,754 ****
--- 869,943 ----
if (hca->send_pool == NULL) {
cmn_err(CE_WARN, "open_hcas: send buf pool failed\n");
rib_rbufpool_destroy(hca, RECV_BUFFER);
goto fail3;
}
+ #ifdef IB_FMR_SUP
+ /* Global FMR POOL */
+ bzero(&fmr_attr, sizeof (ibt_fmr_pool_attr_t));
+ h_page_sz = hca->hca_attrs.hca_page_sz * 1024;
+
+ fmr_attr.fmr_max_pages_per_fmr =
+ (IB_FMR_MAX_SIZE / h_page_sz) + 2;
+ fmr_attr.fmr_pool_size = MAX_BUFS * 2;
+ fmr_attr.fmr_dirty_watermark = IB_FMR_DIRTY_MARK;
+ fmr_attr.fmr_page_sz = h_page_sz;
+ fmr_attr.fmr_cache = B_FALSE;
+ fmr_attr.fmr_flags = IBT_MR_SLEEP |
+ IBT_MR_ENABLE_LOCAL_WRITE |
+ IBT_MR_ENABLE_REMOTE_READ |
+ IBT_MR_ENABLE_REMOTE_WRITE;
+ fmr_attr.fmr_func_hdlr = NULL;
+
+ if (rib_debug > 1) {
+ cmn_err(CE_NOTE, "open_hcas: ibt_create_fmr_pool:");
+ cmn_err(CE_NOTE, "fmr_page_sz %d, fmr_pool_sz %d, "
+ "max_pages_per_fmr %d", fmr_attr.fmr_page_sz,
+ fmr_attr.fmr_pool_size,
+ fmr_attr.fmr_max_pages_per_fmr);
+ }
+
+ ibt_status = ibt_create_fmr_pool(hca->hca_hdl, hca->pd_hdl,
+ &fmr_attr, &hca->fmr_pool);
+ if (ibt_status != IBT_SUCCESS) {
+ cmn_err(CE_WARN, "open_hcas: Global FMR pool creation "
+ "failed: %d\n", ibt_status);
+ rib_rbufpool_destroy(hca, RECV_BUFFER);
+ rib_rbufpool_destroy(hca, SEND_BUFFER);
+ goto fail3;
+ }
+ #endif
+ #ifdef SERVER_REG_CACHE
+ cmn_err(CE_NOTE,"Registration Cache enabled\n");
+ {
+ cache_avl_struct_t my_avl_node;
+ hca->server_side_cache =
+ kmem_cache_create("rib_server_side_cache",
+ sizeof (cache_avl_struct_t), 0,
+ NULL,
+ NULL,
+ rib_server_side_cache_reclaim,
+ hca, NULL, 0);
+ avl_create(&hca->avl_tree,
+ avl_compare,
+ sizeof(cache_avl_struct_t),
+ (uint_t)&my_avl_node.avl_link-(uint_t)&my_avl_node);
+ /* mutex_init(&hca->avl_lock, NULL, MUTEX_DEFAULT, NULL);*/
+ rw_init(&hca->avl_rw_lock, NULL, RW_DRIVER, hca->iblock);
+ hca->avl_init = TRUE;
+
+ }
+ #endif
+
+ #if defined(ASYNC_CLIENT_DEREG)
+ rqueue.forw = rqueue.back = &rqueue;
+ mutex_init(&at_mutex, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&at_cond, NULL, CV_DEFAULT, NULL);
+ (void) thread_create(NULL, 0, async_dereg_thread, NULL, 0, &p0,
+ TS_RUN, minclsyspri);
+ #endif
/*
* Initialize the registered service list and
* the lock
*/
hca->service_list = NULL;
*** 886,895 ****
--- 1075,1162 ----
}
}
}
}
+ #if defined (CLNT_INTERRUPT_COAL)
+ static void
+ rib_scq_free(caddr_t widd)
+ {
+ struct send_wid *wd = (struct send_wid *)widd;
+ ibt_status_t ibt_status;
+ ibt_wc_t wc;
+ int i;
+ CONN *conn = qptoc(wd->qp);
+
+ wc.wc_status = RDMA_SUCCESS;
+ mutex_enter(&wd->sendwait_lock);
+ switch (wc.wc_status) {
+ case IBT_WC_SUCCESS:
+ wd->status = RDMA_SUCCESS;
+ break;
+ case IBT_WC_WR_FLUSHED_ERR:
+ wd->status = RDMA_FAILED;
+ break;
+ default:
+ /*
+ * RC Send Q Error Code Local state Remote State
+ * ==================== =========== ============
+ * IBT_WC_BAD_RESPONSE_ERR ERROR None
+ * IBT_WC_LOCAL_LEN_ERR ERROR None
+ * IBT_WC_LOCAL_CHAN_OP_ERR ERROR None
+ * IBT_WC_LOCAL_PROTECT_ERR ERROR None
+ * IBT_WC_MEM_WIN_BIND_ERR ERROR None
+ * IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR
+ * IBT_WC_REMOTE_ACCESS_ERR ERROR ERROR
+ * IBT_WC_REMOTE_OP_ERR ERROR ERROR
+ * IBT_WC_RNR_NAK_TIMEOUT_ERR ERROR None
+ * IBT_WC_TRANS_TIMEOUT_ERR ERROR None
+ * IBT_WC_WR_FLUSHED_ERR None None
+ */
+ #ifdef DEBUG
+ if (rib_debug > 1) {
+ if (wc.wc_status != IBT_WC_SUCCESS) {
+ cmn_err(CE_NOTE, "rib_clnt_scq_handler: "
+ "WR completed in error, wc.wc_status:%d, "
+ "wc_id:%llx\n", wc.wc_status, (longlong_t)wc.wc_id);
+ }
+ }
+ #endif
+ /*
+ * Channel in error state. Set connection to
+ * ERROR and cleanup will happen either from
+ * conn_release or from rib_conn_get
+ */
+ wd->status = RDMA_FAILED;
+ mutex_enter(&conn->c_lock);
+ if (conn->c_state != C_DISCONN_PEND)
+ conn->c_state = C_ERROR;
+ mutex_exit(&conn->c_lock);
+ break;
+ }
+ if (wd->cv_sig == 1) {
+ /*
+ * Notify poster
+ */
+ cmn_err(CE_NOTE,"Some error \n");
+ cv_signal(&wd->wait_cv);
+ mutex_exit(&wd->sendwait_lock);
+ } else {
+ /*
+ * Poster not waiting for notification.
+ * Free the send buffers and send_wid
+ */
+ for (i = 0; i < wd->nsbufs; i++) {
+ rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER,
+ (void *)(uintptr_t)wd->sbufaddr[i]);
+ }
+ mutex_exit(&wd->sendwait_lock);
+ (void) rib_free_sendwait(wd);
+ }
+ }
+ #endif
+
/* ARGSUSED */
static void
rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
{
ibt_status_t ibt_status;
*** 919,929 ****
wc.wc_status, (longlong_t)wc.wc_id);
}
#endif
if (wc.wc_id != NULL) { /* XXX NULL possible ???? */
struct send_wid *wd = (struct send_wid *)(uintptr_t)wc.wc_id;
!
mutex_enter(&wd->sendwait_lock);
if (wd->cv_sig == 1) {
/*
* Update completion status and notify poster
*/
--- 1186,1217 ----
wc.wc_status, (longlong_t)wc.wc_id);
}
#endif
if (wc.wc_id != NULL) { /* XXX NULL possible ???? */
struct send_wid *wd = (struct send_wid *)(uintptr_t)wc.wc_id;
! #ifdef ASYNC_SERVER_DEREG
! if(wd->c1){
! (void) clist_deregister1((CONN *)wd->c, (struct clist *)wd->c1, TRUE);
! #ifdef SERVER_REG_CACHE
! RDMA_FREE_SERVER_CACHE_BUF((CONN *)wd->c, (rib_lrc_entry_t *)(((struct clist *)wd->c1)->long_reply_buf));
! #else
! if(wd->c1 && wd->l1)
! kmem_free((void *) (wd->c1)->c_saddr, wd->l1);
! #endif
! kmem_free((void *)(wd->c1), wd->wl * sizeof(struct clist));
! }
! if(wd->c2){
! (void) clist_deregister1((CONN *)wd->c, (struct clist *)wd->c2, TRUE);
! #ifdef SERVER_REG_CACHE
! RDMA_FREE_SERVER_CACHE_BUF((CONN *)wd->c, (rib_lrc_entry_t *)(((struct clist *)wd->c2)->long_reply_buf));
! #else
! if(wd->l2)
! kmem_free((void *) (wd->c2)->c_saddr, wd->l2);
! #endif
! kmem_free((void *)(wd->c2), wd->rl * sizeof(struct clist));
! }
! #endif
mutex_enter(&wd->sendwait_lock);
if (wd->cv_sig == 1) {
/*
* Update completion status and notify poster
*/
*** 958,988 ****
{
rib_qp_t *qp;
ibt_status_t ibt_status;
ibt_wc_t wc;
struct recv_wid *rwid;
/*
* Re-enable cq notify here to avoid missing any
* completion queue notification.
*/
(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
ibt_status = IBT_SUCCESS;
while (ibt_status != IBT_CQ_EMPTY) {
bzero(&wc, sizeof (wc));
ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
if (ibt_status != IBT_SUCCESS)
return;
!
rwid = (struct recv_wid *)(uintptr_t)wc.wc_id;
qp = rwid->qp;
if (wc.wc_status == IBT_WC_SUCCESS) {
XDR inxdrs, *xdrs;
uint_t xid, vers, op, find_xid = 0;
struct reply *r;
CONN *conn = qptoc(qp);
xdrs = &inxdrs;
xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr,
wc.wc_bytes_xfer, XDR_DECODE);
/*
--- 1246,1302 ----
{
rib_qp_t *qp;
ibt_status_t ibt_status;
ibt_wc_t wc;
struct recv_wid *rwid;
+ #if defined(CLNT_POLL_CQ)
+ uint32_t count = 0;
+ #endif
/*
* Re-enable cq notify here to avoid missing any
* completion queue notification.
*/
+ #if !defined(CLNT_POLL_CQ)
(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+ #endif
ibt_status = IBT_SUCCESS;
while (ibt_status != IBT_CQ_EMPTY) {
+ #if defined(CLNT_POLL_CQ)
+ poll_cq_again:
+ #endif
bzero(&wc, sizeof (wc));
ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
+ #if defined(CLNT_POLL_CQ)
+ if (ibt_status == IBT_CQ_EMPTY){
+ count ++;
+ if(count == max_poll_count){
+ (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+ return;
+ }
+ goto poll_cq_again;
+ }
+ #endif
if (ibt_status != IBT_SUCCESS)
+ #if defined(CLNT_POLL_CQ)
+ {
+ (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+ #endif
return;
! #if defined(CLNT_POLL_CQ)
! }
! count = 0;
! #endif
rwid = (struct recv_wid *)(uintptr_t)wc.wc_id;
qp = rwid->qp;
if (wc.wc_status == IBT_WC_SUCCESS) {
XDR inxdrs, *xdrs;
uint_t xid, vers, op, find_xid = 0;
struct reply *r;
CONN *conn = qptoc(qp);
+ uint32_t rdma_credit = 0;
xdrs = &inxdrs;
xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr,
wc.wc_bytes_xfer, XDR_DECODE);
/*
*** 991,1000 ****
--- 1305,1315 ----
*/
xid = *(uint32_t *)(uintptr_t)rwid->addr;
/* Skip xid and set the xdr position accordingly. */
XDR_SETPOS(xdrs, sizeof (uint32_t));
(void) xdr_u_int(xdrs, &vers);
+ (void) xdr_u_int(xdrs, &rdma_credit);
(void) xdr_u_int(xdrs, &op);
XDR_DESTROY(xdrs);
if (vers != RPCRDMA_VERS) {
/*
* Invalid RPC/RDMA version. Cannot interoperate.
*** 1108,1124 ****
--- 1423,1443 ----
s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id;
qp = s_recvp->qp;
conn = qptoc(qp);
mutex_enter(&qp->posted_rbufs_lock);
qp->n_posted_rbufs--;
+ #if defined(MEASURE_POOL_DEPTH)
+ rib_posted_rbufs(preposted_rbufs - qp->n_posted_rbufs);
+ #endif
if (qp->n_posted_rbufs == 0)
cv_signal(&qp->posted_rbufs_cv);
mutex_exit(&qp->posted_rbufs_lock);
if (wc.wc_status == IBT_WC_SUCCESS) {
XDR inxdrs, *xdrs;
uint_t xid, vers, op;
+ uint32_t rdma_credit;
xdrs = &inxdrs;
/* s_recvp->vaddr stores data */
xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr,
wc.wc_bytes_xfer, XDR_DECODE);
*** 1129,1138 ****
--- 1448,1458 ----
*/
xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr;
/* Skip xid and set the xdr position accordingly. */
XDR_SETPOS(xdrs, sizeof (uint32_t));
if (!xdr_u_int(xdrs, &vers) ||
+ !xdr_u_int(xdrs, &rdma_credit) ||
!xdr_u_int(xdrs, &op)) {
rib_rbuf_free(conn, RECV_BUFFER,
(void *)(uintptr_t)s_recvp->vaddr);
XDR_DESTROY(xdrs);
#ifdef DEBUG
*** 1338,1347 ****
--- 1658,1668 ----
static rdma_stat
rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp)
{
rib_qp_t *kqp = NULL;
CONN *conn;
+ rdma_clnt_cred_ctrl_t *cc_info;
ASSERT(qp != NULL);
*qp = NULL;
kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
*** 1365,1374 ****
--- 1686,1710 ----
mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock);
mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
+ #if defined (CLNT_INTERRUPT_COAL)
+ kqp->rdmaconn.c_count = 0;
+ conn->c_count = 0;
+ bzero(&kqp->wd, sizeof(struct send_wid));
+ kqp->wd.forw = kqp->wd.back = &kqp->wd;
+ #endif
+ /*
+ * Initialize the client credit control
+ * portion of the rdmaconn struct.
+ */
+ kqp->rdmaconn.c_cc_type = RDMA_CC_CLNT;
+ cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
+ cc_info->clnt_cc_granted_ops = 0;
+ cc_info->clnt_cc_in_flight_ops = 0;
+ cv_init(&cc_info->clnt_cc_cv, NULL, CV_DEFAULT, NULL);
*qp = kqp;
return (RDMA_SUCCESS);
}
*** 1378,1387 ****
--- 1714,1724 ----
{
rib_qp_t *kqp = NULL;
ibt_chan_sizes_t chan_sizes;
ibt_rc_chan_alloc_args_t qp_attr;
ibt_status_t ibt_status;
+ rdma_srv_cred_ctrl_t *cc_info;
ASSERT(qp != NULL);
*qp = NULL;
kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
*** 1439,1449 ****
--- 1776,1799 ----
/*
* Set the private data area to qp to be used in callbacks
*/
ibt_set_chan_private(kqp->qp_hdl, (void *)kqp);
kqp->rdmaconn.c_state = C_CONNECTED;
+
+ /*
+ * Initialize the server credit control
+ * portion of the rdmaconn struct.
+ */
+ kqp->rdmaconn.c_cc_type = RDMA_CC_SRV;
+ cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_srv_cc;
+ cc_info->srv_cc_buffers_granted = preposted_rbufs;
+ cc_info->srv_cc_cur_buffers_used = 0;
+ cc_info->srv_cc_posted = preposted_rbufs;
+
*qp = kqp;
+
+ num_clients++;
return (RDMA_SUCCESS);
fail:
if (kqp)
kmem_free(kqp, sizeof (rib_qp_t));
*** 1722,1733 ****
qp_attr.rc_flags = IBT_WR_SIGNALED;
chan_args.oc_path = path;
chan_args.oc_cm_handler = rib_clnt_cm_handler;
chan_args.oc_cm_clnt_private = (void *)rib_stat;
! chan_args.oc_rdma_ra_out = 1;
! chan_args.oc_rdma_ra_in = 1;
chan_args.oc_path_retry_cnt = 2;
chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES;
refresh:
rw_enter(&hca->state_lock, RW_READER);
--- 2072,2083 ----
qp_attr.rc_flags = IBT_WR_SIGNALED;
chan_args.oc_path = path;
chan_args.oc_cm_handler = rib_clnt_cm_handler;
chan_args.oc_cm_clnt_private = (void *)rib_stat;
! chan_args.oc_rdma_ra_out = 4;
! chan_args.oc_rdma_ra_in = 4;
chan_args.oc_path_retry_cnt = 2;
chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES;
refresh:
rw_enter(&hca->state_lock, RW_READER);
*** 1900,1909 ****
--- 2250,2269 ----
kmem_free(conn->c_raddr.buf, conn->c_raddr.len);
}
if (conn->c_laddr.buf != NULL) {
kmem_free(conn->c_laddr.buf, conn->c_laddr.len);
}
+
+ /*
+ * Credit control cleanup.
+ */
+ if (qp->rdmaconn.c_cc_type == RDMA_CC_CLNT) {
+ rdma_clnt_cred_ctrl_t *cc_info;
+ cc_info = &qp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
+ cv_destroy(&cc_info->clnt_cc_cv);
+ }
+
kmem_free(qp, sizeof (rib_qp_t));
/*
* If HCA has been DETACHED and the srv/clnt_conn_list is NULL,
* then the hca is no longer being used.
*** 1925,1937 ****
--- 2285,2321 ----
}
rw_exit(&hca->srv_conn_list.conn_lock);
}
rw_exit(&hca->state_lock);
}
+
+ num_clients--;
return (RDMA_SUCCESS);
}
+ #ifdef DYNAMIC_CREDIT_CONTROL
+ void rib_get_resource_info(CONN *conn, int *current_clients, int *avail_bufs)
+ {
+ rib_qp_t *qp = ctoqp(conn);
+ rib_hca_t *hca = qp->hca;
+ rib_bufpool_t *rbp = NULL;
+ bufpool_t *bp;
+
+ is_server = 1;
+ rbp = hca->recv_pool;
+
+ if (rbp == NULL)
+ *avail_bufs = 0;
+ else {
+ bp = rbp->bpool;
+ *avail_bufs = bp->buffree;
+ }
+
+ *current_clients = num_clients;
+ }
+ #endif
+
/*
* Wait for send completion notification. Only on receiving a
* notification be it a successful or error completion, free the
* send_wid.
*/
*** 2062,2073 ****
* Send buffers are freed here only in case of error in posting
* on QP. If the post succeeded, the send buffers are freed upon
* send completion in rib_sendwait() or in the scq_handler.
*/
rdma_stat
rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
! int send_sig, int cv_sig)
{
struct send_wid *wdesc;
struct clist *clp;
ibt_status_t ibt_status = IBT_SUCCESS;
rdma_stat ret = RDMA_SUCCESS;
--- 2446,2462 ----
* Send buffers are freed here only in case of error in posting
* on QP. If the post succeeded, the send buffers are freed upon
* send completion in rib_sendwait() or in the scq_handler.
*/
rdma_stat
+ #if defined(ASYNC_SERVER_DEREG)
rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
! int send_sig, int cv_sig, caddr_t c, caddr_t c1, int l1, caddr_t c2, int l2, int l3, int l4)
! #else
! rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
! int send_sig, int cv_sig, caddr_t *swid)
! #endif
{
struct send_wid *wdesc;
struct clist *clp;
ibt_status_t ibt_status = IBT_SUCCESS;
rdma_stat ret = RDMA_SUCCESS;
*** 2100,2114 ****
--- 2489,2514 ----
if (send_sig) {
/* Set SEND_SIGNAL flag. */
tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
wdesc = rib_init_sendwait(msgid, cv_sig, qp);
+ *swid = (caddr_t)wdesc;
} else {
tx_wr.wr_flags = IBT_WR_NO_FLAGS;
wdesc = rib_init_sendwait(msgid, 0, qp);
+ *swid = (caddr_t)wdesc;
}
wdesc->nsbufs = nds;
+ #if defined(ASYNC_SERVER_DEREG)
+ wdesc->c = c;
+ wdesc->c1 = c1;
+ wdesc->c2 = c2;
+ wdesc->l1 = l1;
+ wdesc->l2 = l2;
+ wdesc->wl = l3;
+ wdesc->rl = l4;
+ #endif
for (i = 0; i < nds; i++) {
wdesc->sbufaddr[i] = sgl[i].ds_va;
}
tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
*** 2161,2181 ****
}
return (RDMA_SUCCESS);
}
rdma_stat
rib_send(CONN *conn, struct clist *cl, uint32_t msgid)
{
rdma_stat ret;
/* send-wait & cv_signal */
! ret = rib_send_and_wait(conn, cl, msgid, 1, 1);
return (ret);
}
!
/*
* Server interface (svc_rdma_ksend).
* Send RPC reply and wait for RDMA_DONE.
*/
rdma_stat
--- 2561,2632 ----
}
return (RDMA_SUCCESS);
}
+ #if defined (CLNT_INTERRUPT_COAL)
rdma_stat
+ rib_send_bl(CONN *conn, struct clist *cl, uint32_t msgid)
+ {
+ rdma_stat ret;
+ struct send_wid *sd, dlist;
+ rib_qp_t *qp = ctoqp(conn);
+ caddr_t wd;
+ mutex_enter(&conn->c_lock);
+ if((conn->c_count+1) >= (preposted_rbufs/2)){
+ conn->c_count = 0;
+ dlist.forw = dlist.back = &dlist;
+ while(qp->wd.forw != &qp->wd){
+ sd = qp->wd.forw;
+ remque(sd);
+ insque(sd,&dlist);
+ }
+ mutex_exit(&conn->c_lock);
+ ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd);
+ while(dlist.forw != &dlist){
+ sd = dlist.forw;
+ remque(dlist.forw);
+ rib_scq_free((caddr_t)sd);
+ }
+ }else{
+ mutex_exit(&conn->c_lock);
+ wd = 0;
+ ret = rib_send_and_wait(conn, cl, msgid, 0, 0, &wd);
+ mutex_enter(&conn->c_lock);
+ conn->c_count ++;
+ insque(wd, &qp->wd);
+ mutex_exit(&conn->c_lock);
+ }
+ return (ret);
+ }
+ #endif
+
+ rdma_stat
rib_send(CONN *conn, struct clist *cl, uint32_t msgid)
{
rdma_stat ret;
+ /* send-wait & cv_signal */
+ #if defined(ASYNC_SERVER_DEREG)
+ ret = rib_send_and_wait(conn, cl, msgid,1,1,0,0,0,0,0,0,0, &wd);
+ #else
+ ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd);
+ #endif
+ return (ret);
+ }
+ #if defined(ASYNC_SERVER_DEREG)
+ rdma_stat
+ rib_send_nw(CONN *conn, struct clist *cl, uint32_t msgid, caddr_t c, caddr_t c1, int c2, caddr_t c3, int c4, int c5, int c6)
+ {
+ rdma_stat ret;
+ caddr_t *wid;
/* send-wait & cv_signal */
! ret = rib_send_and_wait(conn, cl, msgid, 1, 0, c, c1, c2, c3, c4, c5, c6, wid);
return (ret);
}
! #endif
/*
* Server interface (svc_rdma_ksend).
* Send RPC reply and wait for RDMA_DONE.
*/
rdma_stat
*** 2182,2198 ****
rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid)
{
rdma_stat ret = RDMA_SUCCESS;
struct rdma_done_list *rd;
clock_t timout, cv_wait_ret;
rib_qp_t *qp = ctoqp(conn);
mutex_enter(&qp->rdlist_lock);
rd = rdma_done_add(qp, msgid);
/* No cv_signal (whether send-wait or no-send-wait) */
! ret = rib_send_and_wait(conn, cl, msgid, 1, 0);
if (ret != RDMA_SUCCESS) {
#ifdef DEBUG
cmn_err(CE_WARN, "rib_send_resp: send_and_wait "
"failed, msgid %u, qp %p", msgid, (void *)qp);
#endif
--- 2633,2654 ----
rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid)
{
rdma_stat ret = RDMA_SUCCESS;
struct rdma_done_list *rd;
clock_t timout, cv_wait_ret;
+ caddr_t *wid;
rib_qp_t *qp = ctoqp(conn);
mutex_enter(&qp->rdlist_lock);
rd = rdma_done_add(qp, msgid);
/* No cv_signal (whether send-wait or no-send-wait) */
! #if defined(ASYNC_SERVER_DEREG)
! ret = rib_send_and_wait(conn, cl, msgid, 1, 0, 0, 0, 0, 0, 0, 0, 0, wid);
! #else
! ret = rib_send_and_wait(conn, cl, msgid, 1, 0, wid);
! #endif
if (ret != RDMA_SUCCESS) {
#ifdef DEBUG
cmn_err(CE_WARN, "rib_send_resp: send_and_wait "
"failed, msgid %u, qp %p", msgid, (void *)qp);
#endif
*** 2496,2506 ****
*/
rdma_stat
rib_write(CONN *conn, struct clist *cl, int wait)
{
ibt_send_wr_t tx_wr;
- int nds;
int cv_sig;
ibt_wr_ds_t sgl[DSEG_MAX];
struct send_wid *wdesc;
ibt_status_t ibt_status;
rdma_stat ret = RDMA_SUCCESS;
--- 2952,2961 ----
*** 2509,2538 ****
if (cl == NULL) {
cmn_err(CE_WARN, "rib_write: NULL clist\n");
return (RDMA_FAILED);
}
bzero(&tx_wr, sizeof (ibt_send_wr_t));
- /*
- * Remote address is at the head chunk item in list.
- */
tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->c_daddr;
tx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_dmemhandle.mrc_rmr; /* rkey */
- nds = 0;
- while (cl != NULL) {
- if (nds >= DSEG_MAX) {
- cmn_err(CE_WARN, "rib_write: DSEG_MAX too small!");
- return (RDMA_FAILED);
- }
- sgl[nds].ds_va = cl->c_saddr;
- sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
- sgl[nds].ds_len = cl->c_len;
- cl = cl->c_next;
- nds++;
- }
-
if (wait) {
tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
cv_sig = 1;
} else {
tx_wr.wr_flags = IBT_WR_NO_FLAGS;
--- 2964,2983 ----
if (cl == NULL) {
cmn_err(CE_WARN, "rib_write: NULL clist\n");
return (RDMA_FAILED);
}
+
+ while ((cl != NULL)) {
+ if(cl->c_len > 0){
bzero(&tx_wr, sizeof (ibt_send_wr_t));
tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->c_daddr;
tx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_dmemhandle.mrc_rmr; /* rkey */
+ sgl[0].ds_va = cl->c_saddr;
+ sgl[0].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
+ sgl[0].ds_len = cl->c_len;
if (wait) {
tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
cv_sig = 1;
} else {
tx_wr.wr_flags = IBT_WR_NO_FLAGS;
*** 2541,2551 ****
wdesc = rib_init_sendwait(0, cv_sig, qp);
tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
tx_wr.wr_opcode = IBT_WRC_RDMAW;
tx_wr.wr_trans = IBT_RC_SRV;
! tx_wr.wr_nds = nds;
tx_wr.wr_sgl = sgl;
mutex_enter(&conn->c_lock);
if (conn->c_state & C_CONNECTED) {
ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
--- 2986,2996 ----
wdesc = rib_init_sendwait(0, cv_sig, qp);
tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
tx_wr.wr_opcode = IBT_WRC_RDMAW;
tx_wr.wr_trans = IBT_RC_SRV;
! tx_wr.wr_nds = 1;
tx_wr.wr_sgl = sgl;
mutex_enter(&conn->c_lock);
if (conn->c_state & C_CONNECTED) {
ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
*** 2565,2574 ****
--- 3010,3022 ----
ret = rib_sendwait(qp, wdesc);
if (ret != 0) {
return (ret);
}
}
+ }
+ cl = cl->c_next;
+ }
return (RDMA_SUCCESS);
}
/*
* RDMA Read a buffer from the remote address.
*** 2683,2693 ****
rpcib_state_t *ribstat;
rib_hca_t *hca;
rdma_stat status = RDMA_SUCCESS;
int i;
struct clist cl;
! rdma_buf_t rdbuf;
void *buf = NULL;
ibt_cm_req_rcv_t cm_req_rcv;
CONN *conn;
ibt_status_t ibt_status;
ibt_ar_t ar_query, ar_result;
--- 3131,3141 ----
rpcib_state_t *ribstat;
rib_hca_t *hca;
rdma_stat status = RDMA_SUCCESS;
int i;
struct clist cl;
! rdma_buf_t rdbuf = {0};
void *buf = NULL;
ibt_cm_req_rcv_t cm_req_rcv;
CONN *conn;
ibt_status_t ibt_status;
ibt_ar_t ar_query, ar_result;
*** 2768,2779 ****
}
}
#endif
ret_args->cm_ret.rep.cm_channel = qp->qp_hdl;
! ret_args->cm_ret.rep.cm_rdma_ra_out = 1;
! ret_args->cm_ret.rep.cm_rdma_ra_in = 1;
ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES;
/*
* Pre-posts RECV buffers
*/
--- 3216,3227 ----
}
}
#endif
ret_args->cm_ret.rep.cm_channel = qp->qp_hdl;
! ret_args->cm_ret.rep.cm_rdma_ra_out = 4;
! ret_args->cm_ret.rep.cm_rdma_ra_in = 4;
ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES;
/*
* Pre-posts RECV buffers
*/
*** 3693,3736 ****
return (0);
}
rdma_stat
! rib_registermem(CONN *conn, caddr_t buf, uint_t buflen,
struct mrc *buf_handle)
{
ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */
ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */
rdma_stat status;
rib_hca_t *hca = (ctoqp(conn))->hca;
/*
* Note: ALL buffer pools use the same memory type RDMARW.
*/
! status = rib_reg_mem(hca, buf, buflen, 0, &mr_hdl, &mr_desc);
if (status == RDMA_SUCCESS) {
buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
} else {
buf_handle->mrc_linfo = NULL;
buf_handle->mrc_lmr = 0;
buf_handle->mrc_rmr = 0;
}
return (status);
}
static rdma_stat
! rib_reg_mem(rib_hca_t *hca, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp)
{
ibt_mr_attr_t mem_attr;
ibt_status_t ibt_status;
-
mem_attr.mr_vaddr = (uintptr_t)buf;
mem_attr.mr_len = (ib_msglen_t)size;
! mem_attr.mr_as = NULL;
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE |
IBT_MR_ENABLE_WINDOW_BIND | spec;
rw_enter(&hca->state_lock, RW_READER);
--- 4141,4289 ----
return (0);
}
rdma_stat
! rib_registermem(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen,
struct mrc *buf_handle)
{
ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */
+ #ifdef IB_FMR_SUP
+ ibt_pmr_desc_t pmr_desc; /* vaddr, lkey, rkey */
+ ibt_ma_hdl_t ma_hdl = NULL;
+ #endif
ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */
rdma_stat status;
rib_hca_t *hca = (ctoqp(conn))->hca;
/*
* Note: ALL buffer pools use the same memory type RDMARW.
*/
! #ifdef IB_FMR_SUP
! status = rib_reg_mem_fmr(hca, adsp, buf, buflen, 0, &mr_hdl, &ma_hdl,
! &pmr_desc);
if (status == RDMA_SUCCESS) {
buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
+ buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey;
+ buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey;
+ buf_handle->mrc_lma = (uintptr_t)ma_hdl;
+ goto ret_stat;
+ } else {
+ buf_handle->mrc_linfo = NULL;
+ buf_handle->mrc_lma = NULL;
+ buf_handle->mrc_lmr = 0;
+ buf_handle->mrc_rmr = 0;
+ }
+ #endif
+ status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
+ if (status == RDMA_SUCCESS) {
+ buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
} else {
buf_handle->mrc_linfo = NULL;
buf_handle->mrc_lmr = 0;
buf_handle->mrc_rmr = 0;
}
+ ret_stat:
return (status);
}
+ #ifdef IB_FMR_SUP
static rdma_stat
! rib_reg_mem_fmr(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
! ibt_mr_hdl_t *mr_hdlp, ibt_ma_hdl_t *ma_hdlp, ibt_pmr_desc_t *pmr_descp)
! {
! ibt_va_attr_t va_attr;
! ibt_phys_buf_t *paddr_list;
! uint_t paddr_list_len, num_paddr;
! size_t buf_sz = 0;
! ibt_pmr_attr_t pmr_attr;
! ib_memlen_t paddr_offset;
! ibt_status_t ibt_status;
! uint_t h_page_sz;
! if(adsp)
! return(RDMA_FAILED);
! bzero(&va_attr, sizeof (ibt_va_attr_t));
! va_attr.va_vaddr = (ib_vaddr_t)buf;
! va_attr.va_len = size;
! va_attr.va_as = (struct as *)(caddr_t)adsp;
! va_attr.va_flags = IBT_VA_FMR | IBT_VA_SLEEP;
! if (spec == IBT_MR_NONCOHERENT)
! va_attr.va_flags |= IBT_VA_NONCOHERENT;
! va_attr.va_phys_buf_min = va_attr.va_phys_buf_max = 0;
!
! h_page_sz = hca->hca_attrs.hca_page_sz * 1024;
! paddr_list_len = (size / h_page_sz) + 2;
! paddr_list = (ibt_phys_buf_t *)kmem_zalloc(sizeof (ibt_phys_buf_t) *
! paddr_list_len, KM_NOSLEEP);
!
! if (rib_debug > 0) {
! cmn_err(CE_NOTE, "fmr: vaddr %p, size %d paddr_list_len %d \n",
! buf, size, paddr_list_len);
! }
!
! ibt_status = ibt_map_mem_area(hca->hca_hdl, &va_attr, paddr_list_len,
! paddr_list, &num_paddr, &buf_sz, &paddr_offset, ma_hdlp);
! if (ibt_status != IBT_SUCCESS) {
! cmn_err(CE_WARN, "rib_reg_mem_fmr: ibt_map_mem_area failed: "
! "status %d", ibt_status);
! kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len);
! return (RDMA_FAILED);
! }
!
! if (rib_debug > 0) {
! cmn_err(CE_NOTE,"fmr: p_laddr %p, p_size %d, buf_sz %d, p_ofset %llX\n",
! paddr_list[0].p_laddr, paddr_list[0].p_size, buf_sz,
! paddr_offset);
! cmn_err(CE_NOTE,"fmr: ibt_map_mem_area: ret %d, num_paddr %d, spec %d\n",
! ibt_status, num_paddr, spec);
! }
!
! bzero(&pmr_attr, sizeof (ibt_pmr_attr_t));
! pmr_attr.pmr_iova = (ib_vaddr_t)buf;
! pmr_attr.pmr_len = size;
! pmr_attr.pmr_num_buf = num_paddr;
! pmr_attr.pmr_buf_sz = buf_sz;
! pmr_attr.pmr_buf_list = paddr_list;
! pmr_attr.pmr_offset = paddr_offset;
! pmr_attr.pmr_flags = spec;
! pmr_attr.pmr_ma = *ma_hdlp;
!
! ibt_status = ibt_register_physical_fmr(hca->hca_hdl, hca->fmr_pool,
! &pmr_attr, mr_hdlp, pmr_descp);
! if (ibt_status != IBT_SUCCESS) {
! cmn_err(CE_WARN, "rib_reg_mem_fmr: ibt_register_physical_fmr "
! "failed: status %d", ibt_status);
! (void) ibt_unmap_mem_area(hca->hca_hdl, *ma_hdlp);
! *ma_hdlp=NULL;
! kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len);
! return (RDMA_FAILED);
! }
!
! if (rib_debug > 0) {
! cmn_err(CE_NOTE,"fmr: rkey: 0x%lX lkey: 0x%lX, iova: %p, fmr_hdl %p \n",
! pmr_descp->pmd_rkey, pmr_descp->pmd_lkey,
! pmr_descp->pmd_iova, *mr_hdlp);
! }
!
! kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len);
!
! return (RDMA_SUCCESS);
!
! }
!
! #endif
! static rdma_stat
! rib_reg_mem(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp)
{
ibt_mr_attr_t mem_attr;
ibt_status_t ibt_status;
mem_attr.mr_vaddr = (uintptr_t)buf;
mem_attr.mr_len = (ib_msglen_t)size;
! mem_attr.mr_as = (struct as *)(caddr_t)adsp;
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE |
IBT_MR_ENABLE_WINDOW_BIND | spec;
rw_enter(&hca->state_lock, RW_READER);
*** 3751,3808 ****
}
return (RDMA_SUCCESS);
}
rdma_stat
! rib_registermemsync(CONN *conn, caddr_t buf, uint_t buflen,
struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle)
{
ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */
ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */
rdma_stat status;
rib_hca_t *hca = (ctoqp(conn))->hca;
/*
* Non-coherent memory registration.
*/
! status = rib_reg_mem(hca, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
&mr_desc);
if (status == RDMA_SUCCESS) {
buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
*sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
} else {
buf_handle->mrc_linfo = NULL;
buf_handle->mrc_lmr = 0;
buf_handle->mrc_rmr = 0;
}
return (status);
}
/* ARGSUSED */
rdma_stat
rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle)
{
rib_hca_t *hca = (ctoqp(conn))->hca;
-
/*
* Allow memory deregistration even if HCA is
* getting detached. Need all outstanding
* memory registrations to be deregistered
* before HCA_DETACH_EVENT can be accepted.
*/
(void) ibt_deregister_mr(hca->hca_hdl,
(ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
return (RDMA_SUCCESS);
}
/* ARGSUSED */
rdma_stat
rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle,
RIB_SYNCMEM_HANDLE sync_handle)
{
(void) rib_deregistermem(conn, buf, buf_handle);
return (RDMA_SUCCESS);
}
--- 4304,4463 ----
}
return (RDMA_SUCCESS);
}
rdma_stat
! rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen,
! #ifdef SERVER_REG_CACHE
! struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc)
! #else
struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle)
+ #endif
{
ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */
+ #ifdef IB_FMR_SUP
+ ibt_pmr_desc_t pmr_desc; /* vaddr, lkey, rkey */
+ ibt_ma_hdl_t ma_hdl = NULL;
+ #endif
+ #ifdef SERVER_REG_CACHE
+ rib_lrc_entry_t *l;
+ #endif
ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */
rdma_stat status;
rib_hca_t *hca = (ctoqp(conn))->hca;
/*
* Non-coherent memory registration.
*/
! #ifdef SERVER_REG_CACHE
! l = (rib_lrc_entry_t *)lrc;
! if(l){
! if(l->registered){
! buf_handle->mrc_linfo = (uintptr_t)l->lrc_mhandle.mrc_linfo;
! buf_handle->mrc_lmr = (uint32_t)l->lrc_mhandle.mrc_lmr;
! buf_handle->mrc_rmr = (uint32_t)l->lrc_mhandle.mrc_rmr;
! #ifdef IB_FMR_SUP
! buf_handle->mrc_lma = (uintptr_t)l->lrc_mhandle.mrc_lma;
! #endif
! *sync_handle = (RIB_SYNCMEM_HANDLE)l->lrc_mhandle.mrc_linfo;
! return(RDMA_SUCCESS);
! } else {
! /* Always register the whole buffer */
! buf = (caddr_t)l->lrc_buf;
! buflen = l->lrc_len;
! /*cmn_err(CE_NOTE,"Register %p of length %d\n",buf,buflen);*/
! }
! }
! #endif
! #ifdef IB_FMR_SUP
! status = rib_reg_mem_fmr(hca, adsp, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
! &ma_hdl, &pmr_desc);
! if (status == RDMA_SUCCESS) {
! buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
! buf_handle->mrc_lma = (uintptr_t)ma_hdl;
! buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey;
! buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey;
! *sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
! #ifdef SERVER_REG_CACHE
! if(l){
! l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl;
! l->lrc_mhandle.mrc_lmr = (uint32_t)mr_desc.md_lkey;
! l->lrc_mhandle.mrc_rmr = (uint32_t)mr_desc.md_rkey;
! l->registered = TRUE;
! l->lrc_mhandle.mrc_lma = (uintptr_t)ma_hdl;
! }
! #endif
! goto ret_stat;
!
! } else {
! if (rib_debug > 1)
! cmn_err(CE_WARN,"fmr reg failed for buffer %p of length %d\n",buf,buflen);
! buf_handle->mrc_linfo = NULL;
! buf_handle->mrc_lma = NULL;
! buf_handle->mrc_lmr = 0;
! buf_handle->mrc_rmr = 0;
! }
! #endif
! status = rib_reg_mem(hca, adsp, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
&mr_desc);
if (status == RDMA_SUCCESS) {
+ #ifdef SERVER_REG_CACHE
+ if(l){
+ l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl;
+ l->lrc_mhandle.mrc_lmr = (uint32_t)mr_desc.md_lkey;
+ l->lrc_mhandle.mrc_rmr = (uint32_t)mr_desc.md_rkey;
+ l->registered = TRUE;
+ }
+ #endif
buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
*sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
} else {
buf_handle->mrc_linfo = NULL;
buf_handle->mrc_lmr = 0;
buf_handle->mrc_rmr = 0;
}
+ ret_stat:
return (status);
}
/* ARGSUSED */
rdma_stat
rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle)
{
+ avl_index_t where = NULL;
+ #ifdef IB_FMR_SUP
+ ibt_status_t ibt_status;
+ #endif
rib_hca_t *hca = (ctoqp(conn))->hca;
/*
* Allow memory deregistration even if HCA is
* getting detached. Need all outstanding
* memory registrations to be deregistered
* before HCA_DETACH_EVENT can be accepted.
*/
+ #ifdef IB_FMR_SUP
+ if(buf_handle.mrc_lma){
+ ibt_status = ibt_unmap_mem_area(hca->hca_hdl,
+ (ibt_ma_hdl_t)buf_handle.mrc_lma);
+ if (ibt_status != IBT_SUCCESS){
+ cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed",
+ ibt_status);
+ return (RDMA_FAILED);
+ }
+
+ ibt_status = ibt_deregister_fmr(hca->hca_hdl,
+ (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
+ if (ibt_status != IBT_SUCCESS)
+ return (RDMA_FAILED);
+ return (RDMA_SUCCESS);
+ }
+ #endif
(void) ibt_deregister_mr(hca->hca_hdl,
(ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
return (RDMA_SUCCESS);
}
/* ARGSUSED */
rdma_stat
rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle,
+ #ifdef SERVER_REG_CACHE
+ RIB_SYNCMEM_HANDLE sync_handle, void *lrc)
+ #else
RIB_SYNCMEM_HANDLE sync_handle)
+ #endif
{
+ #ifdef SERVER_REG_CACHE
+ rib_lrc_entry_t *l;
+ l = (rib_lrc_entry_t *)lrc;
+ if(l)
+ if(l->registered)
+ return(RDMA_SUCCESS);
+ #endif
+
+
(void) rib_deregistermem(conn, buf, buf_handle);
return (RDMA_SUCCESS);
}
*** 3877,3895 ****
num * sizeof (void *), KM_SLEEP);
mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock);
bp->numelems = num;
switch (ptype) {
case SEND_BUFFER:
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
- /* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
bp->rsize = RPC_MSG_SZ;
break;
case RECV_BUFFER:
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
- /* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
bp->rsize = RPC_BUF_SIZE;
break;
default:
goto fail;
}
--- 4532,4549 ----
num * sizeof (void *), KM_SLEEP);
mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock);
bp->numelems = num;
+
switch (ptype) {
case SEND_BUFFER:
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
bp->rsize = RPC_MSG_SZ;
break;
case RECV_BUFFER:
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
bp->rsize = RPC_BUF_SIZE;
break;
default:
goto fail;
}
*** 3901,3914 ****
bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP);
rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num *
sizeof (ibt_mr_hdl_t), KM_SLEEP);
rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num *
sizeof (ibt_mr_desc_t), KM_SLEEP);
-
rw_enter(&hca->state_lock, RW_READER);
if (hca->state != HCA_INITED) {
rw_exit(&hca->state_lock);
goto fail;
}
for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) {
bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t));
mem_attr.mr_vaddr = (uintptr_t)buf;
--- 4555,4568 ----
bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP);
rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num *
sizeof (ibt_mr_hdl_t), KM_SLEEP);
rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num *
sizeof (ibt_mr_desc_t), KM_SLEEP);
rw_enter(&hca->state_lock, RW_READER);
if (hca->state != HCA_INITED) {
rw_exit(&hca->state_lock);
+ cmn_err(CE_WARN,"hca->state != HCA_INITED");
goto fail;
}
for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) {
bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t));
mem_attr.mr_vaddr = (uintptr_t)buf;
*** 3924,3934 ****
rw_exit(&hca->state_lock);
goto fail;
}
}
rw_exit(&hca->state_lock);
-
buf = (caddr_t)bp->buf;
for (i = 0; i < num; i++, buf += bp->rsize) {
bp->buflist[i] = (void *)buf;
}
bp->buffree = num - 1; /* no. of free buffers */
--- 4578,4587 ----
*** 4015,4025 ****
if (rbp->mr_hdl)
kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t));
if (rbp->mr_desc)
kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t));
-
if (bp->buf)
kmem_free(bp->buf, bp->bufsize);
mutex_destroy(&bp->buflock);
kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *));
kmem_free(rbp, sizeof (rib_bufpool_t));
--- 4668,4677 ----
*** 4057,4066 ****
--- 4709,4726 ----
return (RDMA_SUCCESS);
} else
return (RDMA_FAILED);
}
+ #if defined(MEASURE_POOL_DEPTH)
+ static void rib_recv_bufs(uint32_t x) {
+ return;
+ }
+ static void rib_send_bufs(uint32_t x) {
+ return;
+ }
+ #endif
/*
* Fetch a buffer of specified type.
* Note that rdbuf->handle is mw's rkey.
*/
*** 4107,4116 ****
--- 4767,4782 ----
for (i = bp->numelems - 1; i >= 0; i--) {
if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) {
rdbuf->handle.mrc_rmr = (uint32_t)rbp->mr_desc[i].md_rkey;
rdbuf->handle.mrc_linfo = (uintptr_t)rbp->mr_hdl[i];
rdbuf->handle.mrc_lmr = (uint32_t)rbp->mr_desc[i].md_lkey;
+ #if defined(MEASURE_POOL_DEPTH)
+ if(ptype == SEND_BUFFER)
+ rib_send_bufs(MAX_BUFS - (bp->buffree+1));
+ if(ptype == RECV_BUFFER)
+ rib_recv_bufs(MAX_BUFS - (bp->buffree+1));
+ #endif
bp->buffree--;
if (rib_debug > 1)
cmn_err(CE_NOTE, "rib_rbuf_alloc: %d free bufs "
"(type %d)\n", bp->buffree+1, ptype);
*** 4958,4967 ****
--- 5624,5636 ----
* conn_lists are NULL, so destroy
* buffers, close hca and be done.
*/
rib_rbufpool_destroy(hca, RECV_BUFFER);
rib_rbufpool_destroy(hca, SEND_BUFFER);
+ #ifdef SERVER_REG_CACHE
+ rib_destroy_cache(hca);
+ #endif
(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
(void) ibt_close_hca(hca->hca_hdl);
hca->hca_hdl = NULL;
}
rw_exit(&hca->cl_conn_list.conn_lock);
*** 4981,4985 ****
--- 5650,5981 ----
(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
(void) ibt_close_hca(hca->hca_hdl);
hca->hca_hdl = NULL;
}
}
+
+ #ifdef SERVER_REG_CACHE
+
+ static void
+ rib_server_side_cache_reclaim(void *argp)
+ {
+ cache_avl_struct_t *rcas;
+ rib_lrc_entry_t *rb;
+ rib_hca_t *hca = (rib_hca_t *)argp;
+
+ rw_enter(&hca->avl_rw_lock,RW_WRITER);
+ rcas = avl_first(&hca->avl_tree);
+ if(rcas != NULL)
+ avl_remove(&hca->avl_tree, rcas);
+ while(rcas != NULL){
+ while(rcas->r.forw != &rcas->r){
+ rcas->elements--;
+ rb = rcas->r.forw;
+ remque(rb);
+ rib_deregistermem_via_hca(hca, rb->lrc_buf, rb->lrc_mhandle);
+ kmem_free(rb->lrc_buf, rb->lrc_len);
+ kmem_free(rb, sizeof(rib_lrc_entry_t));
+ }
+ mutex_destroy(&rcas->node_lock);
+ kmem_cache_free(hca->server_side_cache,rcas);
+ rcas = avl_first(&hca->avl_tree);
+ if(rcas != NULL)
+ avl_remove(&hca->avl_tree, rcas);
+ }
+ rw_exit(&hca->avl_rw_lock);
+ }
+
+ static int avl_compare(const void *t1,const void *t2) {
+
+ if(rib_debug > 1)
+ cmn_err(CE_NOTE,"Comparing %d and %d\n",((cache_avl_struct_t *)t1)->len, ((cache_avl_struct_t *)t2)->len);
+ if(((cache_avl_struct_t *)t1)->len == ((cache_avl_struct_t *)t2)->len)
+ return 0;
+
+ if(((cache_avl_struct_t *)t1)->len < ((cache_avl_struct_t *)t2)->len)
+ return -1;
+
+ if(((cache_avl_struct_t *)t1)->len > ((cache_avl_struct_t *)t2)->len)
+ return 1;
+ }
+
+ static void rib_destroy_cache(rib_hca_t *hca) {
+ cache_avl_struct_t *rcas, *root;
+ rib_lrc_entry_t *rb;
+
+ hca->avl_init = FALSE;
+ kmem_cache_destroy(hca->server_side_cache);
+ avl_destroy(&hca->avl_tree);
+ rw_destroy(&hca->avl_rw_lock);
+
+ }
+
+ static rib_lrc_entry_t *
+ rib_get_server_cache_buf(CONN *conn,uint32_t len)
+ {
+ cache_avl_struct_t cas,*rcas;
+ rib_hca_t *hca = (ctoqp(conn))->hca;
+ rib_lrc_entry_t *reply_buf;
+ avl_index_t where = NULL;
+ struct rib_lrc_entry *forw = NULL;
+ if(!hca->avl_init)
+ goto error_alloc;
+ cas.len = len;
+ rw_enter(&hca->avl_rw_lock, RW_READER);
+ if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas, &where)) == NULL){
+ rw_exit(&hca->avl_rw_lock);
+ rw_enter(&hca->avl_rw_lock, RW_WRITER);
+ /* Recheck to make sure no other thread added the entry in */
+ if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas, &where)) == NULL){
+ /* Allocate an avl tree entry */
+ if(rib_debug > 1)
+ cmn_err(CE_NOTE,"Allocating an avl entry for length %d\n",len);
+ rcas = (cache_avl_struct_t *)kmem_cache_alloc(hca->server_side_cache,KM_SLEEP);
+ bzero(rcas, sizeof(cache_avl_struct_t));
+ rcas->elements = 0;
+ rcas->r.forw =
+ &rcas->r;
+ rcas->r.back =
+ &rcas->r;
+ rcas->len = len;
+ mutex_init(&rcas->node_lock, NULL, MUTEX_DEFAULT, NULL);
+ avl_insert(&hca->avl_tree,rcas,where);
+ }
+ }
+ if(rcas->elements > 0){
+ mutex_enter(&rcas->node_lock);
+ reply_buf = rcas->r.forw;
+ remque(reply_buf);
+ rcas->elements --;
+ mutex_exit(&rcas->node_lock);
+ rw_exit(&hca->avl_rw_lock);
+ if(rib_debug > 1)
+ cmn_err(CE_NOTE,"Allocating a pre-alloced buffer for length %d\n",len);
+ } else {
+ rw_exit(&hca->avl_rw_lock);
+ rib_total_buffers ++;
+ if(rib_debug > 1)
+ cmn_err(CE_NOTE,"Allocating a new buffer for length %d\n",len);
+ /* Allocate a reply_buf entry */
+ reply_buf = (rib_lrc_entry_t *)kmem_alloc(sizeof(rib_lrc_entry_t), KM_SLEEP);
+ bzero(reply_buf,sizeof(rib_lrc_entry_t));
+ reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP);
+ reply_buf->lrc_len = len;
+ reply_buf->registered = FALSE;
+ reply_buf->avl_node = (void *)rcas;
+ }
+
+ return reply_buf;
+ error_alloc:
+ reply_buf = (rib_lrc_entry_t *)kmem_alloc(sizeof(rib_lrc_entry_t), KM_SLEEP);
+ bzero(reply_buf,sizeof(rib_lrc_entry_t));
+ reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP);
+ reply_buf->lrc_len = len;
+ reply_buf->registered = FALSE;
+ reply_buf->avl_node = NULL;
+ return reply_buf;
+ }
+
+ /*
+ * Return a pre-registered back to the cache (without
+ * unregistering the buffer)..
+ */
+
+ static void
+ rib_free_server_cache_buf(CONN *conn, rib_lrc_entry_t *reg_buf)
+ {
+ cache_avl_struct_t cas,*rcas;
+ avl_index_t where = NULL;
+ rib_hca_t *hca = (ctoqp(conn))->hca;
+ if(!reg_buf){
+ cmn_err(CE_WARN,"Got a null reg_buf\n");
+ return;
+ }
+ if(!hca->avl_init)
+ goto error_free;
+ cas.len = reg_buf->lrc_len;
+ rw_enter(&hca->avl_rw_lock, RW_READER);
+ if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree,&cas,&where)) == NULL){
+ rw_exit(&hca->avl_rw_lock);
+ goto error_free;
+ } else {
+ mutex_enter(&rcas->node_lock);
+ insque(reg_buf,&rcas->r);
+ rcas->elements ++;
+ mutex_exit(&rcas->node_lock);
+ rw_exit(&hca->avl_rw_lock);
+ if(rib_debug > 1)
+ cmn_err(CE_NOTE,"Returning buffer for length %d\n",reg_buf->lrc_len);
+ }
+ return;
+ error_free:
+ rib_deregistermem_via_hca(hca, reg_buf->lrc_buf, reg_buf->lrc_mhandle);
+ kmem_free(reg_buf->lrc_buf,reg_buf->lrc_len);
+ kmem_free(reg_buf,sizeof(rib_lrc_entry_t));
+ }
+
+ #endif
+
+ static rdma_stat
+ rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, caddr_t buf,
+ uint_t buflen, struct mrc *buf_handle)
+ {
+ ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */
+ #ifdef IB_FMR_SUP
+ ibt_pmr_desc_t pmr_desc; /* vaddr, lkey, rkey */
+ ibt_ma_hdl_t ma_hdl = NULL;
+ #endif
+ ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */
+ rdma_stat status;
+
+
+ /*
+ * Note: ALL buffer pools use the same memory type RDMARW.
+ */
+ /* This code will not be activated on the server. We could remove
+ the call to rib_reg_mem_fmr. But leave it in, in case the FMR
+ bugs get fixed. The bigger question is whether we need FMR when
+ the registered bufffers are coming out of a slab cache. This needs
+ to be evaluated.
+ */
+ #ifdef IB_FMR_SUP
+ status = rib_reg_mem_fmr(hca, buf, adsp, buflen, 0, &mr_hdl, &ma_hdl,
+ &pmr_desc);
+ if (status == RDMA_SUCCESS) {
+ buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
+ buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey;
+ buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey;
+ buf_handle->mrc_lma = (uintptr_t)ma_hdl;
+ goto ret_stat;
+ } else {
+ buf_handle->mrc_linfo = NULL;
+ buf_handle->mrc_lma = NULL;
+ buf_handle->mrc_lmr = 0;
+ buf_handle->mrc_rmr = 0;
+ }
+ #endif
+ status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
+ if (status == RDMA_SUCCESS) {
+ buf_handle->mrc_linfo = (uint64_t)mr_hdl;
+ buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
+ buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
+ } else {
+ buf_handle->mrc_linfo = NULL;
+ buf_handle->mrc_lmr = 0;
+ buf_handle->mrc_rmr = 0;
+ }
+ ret_stat:
+ return (status);
+ }
+
+ /* ARGSUSED */
+ static rdma_stat
+ rib_deregistermemsync_via_hca(rib_hca_t *hca, caddr_t buf,
+ struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle)
+ {
+
+ (void) rib_deregistermem_via_hca(hca, buf, buf_handle);
+
+ return (RDMA_SUCCESS);
+ }
+
+ /* ARGSUSED */
+ static rdma_stat
+ rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle)
+ {
+ #ifdef IB_FMR_SUP
+ ibt_status_t ibt_status;
+ if(buf_handle.mrc_lma){
+ ibt_status = ibt_unmap_mem_area(hca->hca_hdl,
+ (ibt_ma_hdl_t)buf_handle.mrc_lma);
+ if (ibt_status != IBT_SUCCESS){
+ cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed",
+ ibt_status);
+ return (RDMA_FAILED);
+ }
+ ibt_status = ibt_deregister_fmr(hca->hca_hdl,
+ (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
+ if (ibt_status != IBT_SUCCESS){
+ cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed",
+ ibt_status);
+ return (RDMA_FAILED);
+ }
+ return (RDMA_SUCCESS);
+ }
+ #endif
+
+ (void) ibt_deregister_mr(hca->hca_hdl,
+ (ibt_mr_hdl_t)buf_handle.mrc_linfo);
+ return (RDMA_SUCCESS);
+ }
+
+ #if defined(ASYNC_SERVER_DEREG)||defined(ASYNC_CLIENT_DEREG)
+ static int
+ clist_deregister1(CONN *conn, struct clist *cl, bool_t src)
+ {
+ struct clist *c;
+
+ for (c = cl; c; c = c->c_next) {
+ if (src) {
+ if (c->c_smemhandle.mrc_rmr != 0) {
+ (void) RDMA_DEREGMEMSYNC(conn,
+ (caddr_t)(uintptr_t)c->c_saddr,
+ c->c_smemhandle,
+ #ifdef SERVER_REG_CACHE
+ (void *)(uintptr_t)c->c_ssynchandle, (void *)c->long_reply_buf);
+ #else
+ (void *)(uintptr_t)c->c_ssynchandle);
+ #endif
+ c->c_smemhandle.mrc_rmr = 0;
+ c->c_ssynchandle = NULL;
+ }
+ } else {
+ if (c->c_dmemhandle.mrc_rmr != 0) {
+ (void) RDMA_DEREGMEMSYNC(conn,
+ (caddr_t)(uintptr_t)c->c_daddr,
+ c->c_dmemhandle,
+ #ifdef SERVER_REG_CACHE
+ (void *)(uintptr_t)c->c_dsynchandle, (void *)c->long_reply_buf);
+ #else
+ (void *)(uintptr_t)c->c_dsynchandle);
+ #endif
+ c->c_dmemhandle.mrc_rmr = 0;
+ c->c_dsynchandle = NULL;
+ }
+ }
+ }
+
+ return (RDMA_SUCCESS);
+ }
+ #endif
+
+
+
+ #if defined(ASYNC_CLIENT_DEREG)
+ static void
+ async_dereg_thread(caddr_t arg){
+ ASYNC *r;
+ cmn_err(CE_WARN,"async_dereg_thread initiated\n");
+ fetch_another_entry:
+ mutex_enter(&at_mutex);
+ while ((rqueue.forw == rqueue.back) && (rqueue.forw == &rqueue))
+ cv_wait(&at_cond, &at_mutex);
+ r=rqueue.forw;
+ remque(rqueue.forw);
+ mutex_exit(&at_mutex);
+ /* Process deregistration */
+ clist_deregister1(&r->c_conn, &r->c_clist, FALSE);
+ kmem_free(r, sizeof(ASYNC));
+ goto fetch_another_entry;
+
+ }
+ void insert_queue(CONN *conn, struct clist *rwc){
+ ASYNC *r;
+ r=kmem_zalloc(sizeof(ASYNC),KM_SLEEP);
+ r->c_clist = *rwc;
+ r->c_conn = *conn;
+ mutex_enter(&at_mutex);
+ insque(r,&rqueue);
+ cv_broadcast(&at_cond);
+ mutex_exit(&at_mutex);
+ }
+ #endif