Udiff rpcib.c
--- /webrev/webrev/usr/src/uts/common/rpc/rpcib.c- Mon Aug 14 13:12:11 2006
+++ rpcib.c Thu Aug 10 14:05:27 2006
@@ -22,10 +22,24 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+
+ /* Copyright (c) 2006, The Ohio State University. All rights reserved.
+ *
+ * Portions of this source code is developed by the team members of
+ * The Ohio State University's Network-Based Computing Laboratory (NBCL),
+ * headed by Professor Dhabaleswar K. (DK) Panda.
+ *
+ * Acknowledgements to contributions from developors:
+ * Ranjit Noronha: noronha@cse.ohio-state.edu
+ * Lei Chai : chail@cse.ohio-state.edu
+ * Weikuan Yu : yuw@cse.ohio-state.edu
+ *
+ */
+
#pragma ident "@(#)rpcib.c 1.29 06/01/25 SMI"
/*
* The rpcib plugin. Implements the interface for RDMATF's
* interaction with IBTF.
@@ -55,10 +69,12 @@
#include <sys/isa_defs.h>
#include <sys/callb.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
+/* #define IB_FMR_SUP */
+/* #define CLNT_POLL_CQ */
#include <sys/ib/ibtl/ibti.h>
#include <rpc/rpc.h>
#include <rpc/ib.h>
#include <sys/modctl.h>
@@ -68,11 +84,14 @@
#include <sys/sockio.h>
#include <sys/vnode.h>
#include <sys/tiuser.h>
#include <net/if.h>
#include <sys/cred.h>
+#include <rpc/rpc_rdma.h>
+int num_clients = 0;
+volatile uint32_t is_server = 0;
extern char *inet_ntop(int, const void *, char *, int);
/*
@@ -105,10 +124,13 @@
CB_REV, /* rev */
nodev, /* int (*cb_aread)() */
nodev /* int (*cb_awrite)() */
};
+
+
+
/*
* Device options
*/
static struct dev_ops rpcib_ops = {
DEVO_REV, /* devo_rev, */
@@ -138,21 +160,41 @@
MODREV_1,
(void *)&rib_modldrv,
NULL
};
+#ifdef SERVER_REG_CACHE
+typedef struct cache_struct {
+avl_node_t avl_link;
+rib_lrc_entry_t r;
+uint32_t len;
+uint32_t elements;
+kmutex_t node_lock;
+} cache_avl_struct_t;
+
+
+#if 1
+int rib_total_buffers = 0;
+#endif
+#endif
/*
* rib_stat: private data pointer used when registering
* with the IBTF. It is returned to the consumer
* in all callbacks.
*/
static rpcib_state_t *rib_stat = NULL;
-#define RNR_RETRIES 2
+#define RNR_RETRIES IBT_RNR_INFINITE_RETRY
#define MAX_PORTS 2
-int preposted_rbufs = 16;
+#ifdef IB_FMR_SUP
+#define IB_FMR_DIRTY_MARK 32
+#define IB_FMR_MAX_SIZE 1048576
+/*#define IB_FMR_MAX_SIZE 32768 */
+#endif
+
+int preposted_rbufs = RDMA_BUFS_GRANT;
int send_threshold = 1;
/*
* State of the plugin.
* ACCEPT = accepting new connections and requests.
@@ -167,22 +209,37 @@
/*
* RPCIB RDMATF operations
*/
+#if defined(MEASURE_POOL_DEPTH)
+static void rib_posted_rbufs(uint32_t x) { return;}
+#endif
static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle);
static rdma_stat rib_disconnect(CONN *conn);
static void rib_listen(struct rdma_svc_data *rd);
static void rib_listen_stop(struct rdma_svc_data *rd);
-static rdma_stat rib_registermem(CONN *conn, caddr_t buf, uint_t buflen,
+static rdma_stat rib_registermem(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen,
struct mrc *buf_handle);
static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf,
struct mrc buf_handle);
-static rdma_stat rib_registermemsync(CONN *conn, caddr_t buf, uint_t buflen,
+static rdma_stat rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp,
+ caddr_t buf, uint_t buflen, struct mrc *buf_handle);
+static rdma_stat rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf,
+ struct mrc buf_handle);
+#ifdef SERVER_REG_CACHE
+static rdma_stat rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen,
+ struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc);
+static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
+ struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle, void *);
+#else
+static rdma_stat rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen,
struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle);
static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle);
+
+#endif
static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle,
caddr_t buf, int len, int cpu);
static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf);
@@ -190,10 +247,20 @@
static void *rib_rbuf_alloc(CONN *, rdma_buf_t *);
static void rib_rbuf_free(CONN *conn, int ptype, void *buf);
static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid);
+#if defined (CLNT_INTERRUPT_COAL)
+static void rib_scq_free(caddr_t);
+static rdma_stat rib_send_bl(CONN *conn, struct clist *cl, uint32_t msgid);
+#endif
+#if defined(ASYNC_SERVER_DEREG)
+static rdma_stat rib_send_nw(CONN *conn, struct clist *cl, uint32_t msgid, caddr_t, caddr_t, int, caddr_t, int, int, int);
+#endif
+#if defined(ASYNC_CLIENT_DEREG)
+static void insert_queue(CONN *conn, struct clist *rwc);
+#endif
static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid);
static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid);
static rdma_stat rib_post_recv(CONN *conn, struct clist *cl);
static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid);
static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait);
@@ -200,10 +267,23 @@
static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait);
static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rib_hca_t **);
static rdma_stat rib_conn_get(struct netbuf *, int addr_type, void *, CONN **);
static rdma_stat rib_conn_release(CONN *conn);
static rdma_stat rib_getinfo(rdma_info_t *info);
+#ifdef DYNAMIC_CREDIT_CONTROL
+void rib_get_resource_info(CONN *, int *, int *);
+#endif
+
+#ifdef SERVER_REG_CACHE
+static rib_lrc_entry_t *rib_get_server_cache_buf(CONN *conn, uint32_t len);
+static void rib_free_server_cache_buf(CONN *conn, rib_lrc_entry_t *buf);
+static void rib_destroy_cache(rib_hca_t *hca);
+static void
+ rib_server_side_cache_reclaim(void *argp);
+static int avl_compare(const void *t1,const void *t2);
+#endif
+
static rdma_stat rib_register_ats(rib_hca_t *);
static void rib_deregister_ats();
static void rib_stop_services(rib_hca_t *);
/*
@@ -213,12 +293,27 @@
int get_interfaces(TIUSER *tiptr, int *num);
int find_addrs(TIUSER *tiptr, char **addrs, int num_ifs);
int get_ibd_ipaddr(rpcib_ibd_insts_t *);
rpcib_ats_t *get_ibd_entry(ib_gid_t *, ib_pkey_t, rpcib_ibd_insts_t *);
void rib_get_ibd_insts(rpcib_ibd_insts_t *);
+#if defined(ASYNC_SERVER_DEREG)||defined(ASYNC_CLIENT_DEREG)
+static int clist_deregister1(CONN *, struct clist *, bool_t );
+#endif
-
+#if defined(ASYNC_CLIENT_DEREG)
+typedef struct async_dereg {
+ struct async_dereg *forw;
+ struct async_dereg *back;
+ CONN c_conn;
+ struct clist c_clist;
+} ASYNC;
+static void async_dereg_thread(caddr_t arg);
+extern pri_t minclsyspri; /* priority for taskq */
+static ASYNC rqueue;
+static kmutex_t at_mutex;
+static kcondvar_t at_cond;
+#endif
/*
* RDMA operations the RPCIB module exports
*/
static rdmaops_t rib_ops = {
rib_reachable,
@@ -232,17 +327,33 @@
rib_deregistermemsync,
rib_syncmem,
rib_reg_buf_alloc,
rib_reg_buf_free,
rib_send,
+#if defined (CLNT_INTERRUPT_COAL)
+ rib_send_bl,
+#endif
+#if defined(ASYNC_SERVER_DEREG)
+ rib_send_nw,
+#endif
rib_send_resp,
rib_post_resp,
rib_post_recv,
rib_recv,
rib_read,
rib_write,
- rib_getinfo
+ rib_getinfo,
+#ifdef SERVER_REG_CACHE
+ rib_get_server_cache_buf,
+ rib_free_server_cache_buf,
+#endif
+#ifdef DYNAMIC_CREDIT_CONTROL
+ rib_get_resource_info,
+#endif
+#if defined(ASYNC_CLIENT_DEREG)
+ insert_queue,
+#endif
};
/*
* RDMATF RPCIB plugin details
*/
@@ -258,12 +369,18 @@
static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num);
-static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
+#ifdef IB_FMR_SUP
+static rdma_stat rib_reg_mem_fmr(rib_hca_t *, caddr_t adsp,caddr_t, uint_t, ibt_mr_flags_t,
+ ibt_mr_hdl_t *, ibt_ma_hdl_t *, ibt_pmr_desc_t *);
+#endif
+static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t adsp, caddr_t, uint_t, ibt_mr_flags_t,
ibt_mr_hdl_t *, ibt_mr_desc_t *);
+static rdma_stat rib_reg_mem_user(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
+ ibt_mr_hdl_t *, ibt_mr_desc_t *, caddr_t);
static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, ibt_path_info_t *);
static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *,
rib_qp_t **);
static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t,
rib_qp_t **);
@@ -314,12 +431,16 @@
* debugging in rpcib kernel module.
* Set it to values greater that 1 to control
* the amount of debugging messages required.
*/
int rib_debug = 0;
-
+#if defined(CLNT_POLL_CQ)
+int max_poll_count = 500;
+#endif
static int ats_running = 0;
+
+
int
_init(void)
{
int error;
@@ -571,10 +692,11 @@
static rdma_stat rib_rem_replylist(rib_qp_t *);
static int rib_remreply(rib_qp_t *, struct reply *);
static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *);
static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *);
+
/*
* One CQ pair per HCA
*/
static rdma_stat
rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler,
@@ -631,11 +753,14 @@
rdma_stat status;
ibt_hca_portinfo_t *pinfop;
ibt_pd_flags_t pd_flags = IBT_PD_NO_FLAGS;
uint_t size, cq_size;
int i;
-
+#ifdef IB_FMR_SUP
+ ibt_fmr_pool_attr_t fmr_attr;
+ uint_t h_page_sz;
+#endif
ASSERT(MUTEX_HELD(&ribstat->open_hca_lock));
if (ribstat->hcas == NULL)
ribstat->hcas = kmem_zalloc(ribstat->hca_count *
sizeof (rib_hca_t), KM_SLEEP);
@@ -744,11 +869,75 @@
if (hca->send_pool == NULL) {
cmn_err(CE_WARN, "open_hcas: send buf pool failed\n");
rib_rbufpool_destroy(hca, RECV_BUFFER);
goto fail3;
}
+#ifdef IB_FMR_SUP
+ /* Global FMR POOL */
+ bzero(&fmr_attr, sizeof (ibt_fmr_pool_attr_t));
+ h_page_sz = hca->hca_attrs.hca_page_sz * 1024;
+
+ fmr_attr.fmr_max_pages_per_fmr =
+ (IB_FMR_MAX_SIZE / h_page_sz) + 2;
+ fmr_attr.fmr_pool_size = MAX_BUFS * 2;
+ fmr_attr.fmr_dirty_watermark = IB_FMR_DIRTY_MARK;
+ fmr_attr.fmr_page_sz = h_page_sz;
+ fmr_attr.fmr_cache = B_FALSE;
+ fmr_attr.fmr_flags = IBT_MR_SLEEP |
+ IBT_MR_ENABLE_LOCAL_WRITE |
+ IBT_MR_ENABLE_REMOTE_READ |
+ IBT_MR_ENABLE_REMOTE_WRITE;
+ fmr_attr.fmr_func_hdlr = NULL;
+
+ if (rib_debug > 1) {
+ cmn_err(CE_NOTE, "open_hcas: ibt_create_fmr_pool:");
+ cmn_err(CE_NOTE, "fmr_page_sz %d, fmr_pool_sz %d, "
+ "max_pages_per_fmr %d", fmr_attr.fmr_page_sz,
+ fmr_attr.fmr_pool_size,
+ fmr_attr.fmr_max_pages_per_fmr);
+ }
+
+ ibt_status = ibt_create_fmr_pool(hca->hca_hdl, hca->pd_hdl,
+ &fmr_attr, &hca->fmr_pool);
+ if (ibt_status != IBT_SUCCESS) {
+ cmn_err(CE_WARN, "open_hcas: Global FMR pool creation "
+ "failed: %d\n", ibt_status);
+ rib_rbufpool_destroy(hca, RECV_BUFFER);
+ rib_rbufpool_destroy(hca, SEND_BUFFER);
+ goto fail3;
+ }
+#endif
+#ifdef SERVER_REG_CACHE
+ cmn_err(CE_NOTE,"Registration Cache enabled\n");
+ {
+ cache_avl_struct_t my_avl_node;
+ hca->server_side_cache =
+ kmem_cache_create("rib_server_side_cache",
+ sizeof (cache_avl_struct_t), 0,
+ NULL,
+ NULL,
+ rib_server_side_cache_reclaim,
+ hca, NULL, 0);
+ avl_create(&hca->avl_tree,
+ avl_compare,
+ sizeof(cache_avl_struct_t),
+ (uint_t)&my_avl_node.avl_link-(uint_t)&my_avl_node);
+ /* mutex_init(&hca->avl_lock, NULL, MUTEX_DEFAULT, NULL);*/
+ rw_init(&hca->avl_rw_lock, NULL, RW_DRIVER, hca->iblock);
+ hca->avl_init = TRUE;
+
+ }
+#endif
+
+#if defined(ASYNC_CLIENT_DEREG)
+ rqueue.forw = rqueue.back = &rqueue;
+ mutex_init(&at_mutex, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&at_cond, NULL, CV_DEFAULT, NULL);
+ (void) thread_create(NULL, 0, async_dereg_thread, NULL, 0, &p0,
+ TS_RUN, minclsyspri);
+#endif
/*
* Initialize the registered service list and
* the lock
*/
hca->service_list = NULL;
@@ -886,10 +1075,88 @@
}
}
}
}
+#if defined (CLNT_INTERRUPT_COAL)
+static void
+rib_scq_free(caddr_t widd)
+{
+ struct send_wid *wd = (struct send_wid *)widd;
+ ibt_status_t ibt_status;
+ ibt_wc_t wc;
+ int i;
+ CONN *conn = qptoc(wd->qp);
+
+ wc.wc_status = RDMA_SUCCESS;
+ mutex_enter(&wd->sendwait_lock);
+ switch (wc.wc_status) {
+ case IBT_WC_SUCCESS:
+ wd->status = RDMA_SUCCESS;
+ break;
+ case IBT_WC_WR_FLUSHED_ERR:
+ wd->status = RDMA_FAILED;
+ break;
+ default:
+/*
+ * RC Send Q Error Code Local state Remote State
+ * ==================== =========== ============
+ * IBT_WC_BAD_RESPONSE_ERR ERROR None
+ * IBT_WC_LOCAL_LEN_ERR ERROR None
+ * IBT_WC_LOCAL_CHAN_OP_ERR ERROR None
+ * IBT_WC_LOCAL_PROTECT_ERR ERROR None
+ * IBT_WC_MEM_WIN_BIND_ERR ERROR None
+ * IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR
+ * IBT_WC_REMOTE_ACCESS_ERR ERROR ERROR
+ * IBT_WC_REMOTE_OP_ERR ERROR ERROR
+ * IBT_WC_RNR_NAK_TIMEOUT_ERR ERROR None
+ * IBT_WC_TRANS_TIMEOUT_ERR ERROR None
+ * IBT_WC_WR_FLUSHED_ERR None None
+ */
+#ifdef DEBUG
+ if (rib_debug > 1) {
+ if (wc.wc_status != IBT_WC_SUCCESS) {
+ cmn_err(CE_NOTE, "rib_clnt_scq_handler: "
+ "WR completed in error, wc.wc_status:%d, "
+ "wc_id:%llx\n", wc.wc_status, (longlong_t)wc.wc_id);
+ }
+ }
+#endif
+ /*
+ * Channel in error state. Set connection to
+ * ERROR and cleanup will happen either from
+ * conn_release or from rib_conn_get
+ */
+ wd->status = RDMA_FAILED;
+ mutex_enter(&conn->c_lock);
+ if (conn->c_state != C_DISCONN_PEND)
+ conn->c_state = C_ERROR;
+ mutex_exit(&conn->c_lock);
+ break;
+ }
+ if (wd->cv_sig == 1) {
+ /*
+ * Notify poster
+ */
+ cmn_err(CE_NOTE,"Some error \n");
+ cv_signal(&wd->wait_cv);
+ mutex_exit(&wd->sendwait_lock);
+ } else {
+ /*
+ * Poster not waiting for notification.
+ * Free the send buffers and send_wid
+ */
+ for (i = 0; i < wd->nsbufs; i++) {
+ rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER,
+ (void *)(uintptr_t)wd->sbufaddr[i]);
+ }
+ mutex_exit(&wd->sendwait_lock);
+ (void) rib_free_sendwait(wd);
+ }
+}
+#endif
+
/* ARGSUSED */
static void
rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
{
ibt_status_t ibt_status;
@@ -919,11 +1186,32 @@
wc.wc_status, (longlong_t)wc.wc_id);
}
#endif
if (wc.wc_id != NULL) { /* XXX NULL possible ???? */
struct send_wid *wd = (struct send_wid *)(uintptr_t)wc.wc_id;
-
+#ifdef ASYNC_SERVER_DEREG
+ if(wd->c1){
+ (void) clist_deregister1((CONN *)wd->c, (struct clist *)wd->c1, TRUE);
+#ifdef SERVER_REG_CACHE
+ RDMA_FREE_SERVER_CACHE_BUF((CONN *)wd->c, (rib_lrc_entry_t *)(((struct clist *)wd->c1)->long_reply_buf));
+#else
+ if(wd->c1 && wd->l1)
+ kmem_free((void *) (wd->c1)->c_saddr, wd->l1);
+#endif
+ kmem_free((void *)(wd->c1), wd->wl * sizeof(struct clist));
+ }
+ if(wd->c2){
+ (void) clist_deregister1((CONN *)wd->c, (struct clist *)wd->c2, TRUE);
+#ifdef SERVER_REG_CACHE
+ RDMA_FREE_SERVER_CACHE_BUF((CONN *)wd->c, (rib_lrc_entry_t *)(((struct clist *)wd->c2)->long_reply_buf));
+#else
+ if(wd->l2)
+ kmem_free((void *) (wd->c2)->c_saddr, wd->l2);
+#endif
+ kmem_free((void *)(wd->c2), wd->rl * sizeof(struct clist));
+ }
+#endif
mutex_enter(&wd->sendwait_lock);
if (wd->cv_sig == 1) {
/*
* Update completion status and notify poster
*/
@@ -958,31 +1246,57 @@
{
rib_qp_t *qp;
ibt_status_t ibt_status;
ibt_wc_t wc;
struct recv_wid *rwid;
+#if defined(CLNT_POLL_CQ)
+ uint32_t count = 0;
+#endif
/*
* Re-enable cq notify here to avoid missing any
* completion queue notification.
*/
+#if !defined(CLNT_POLL_CQ)
(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+#endif
ibt_status = IBT_SUCCESS;
while (ibt_status != IBT_CQ_EMPTY) {
+#if defined(CLNT_POLL_CQ)
+ poll_cq_again:
+#endif
bzero(&wc, sizeof (wc));
ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
+#if defined(CLNT_POLL_CQ)
+ if (ibt_status == IBT_CQ_EMPTY){
+ count ++;
+ if(count == max_poll_count){
+ (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+ return;
+ }
+ goto poll_cq_again;
+ }
+#endif
if (ibt_status != IBT_SUCCESS)
+#if defined(CLNT_POLL_CQ)
+ {
+ (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+#endif
return;
-
+#if defined(CLNT_POLL_CQ)
+ }
+ count = 0;
+#endif
rwid = (struct recv_wid *)(uintptr_t)wc.wc_id;
qp = rwid->qp;
if (wc.wc_status == IBT_WC_SUCCESS) {
XDR inxdrs, *xdrs;
uint_t xid, vers, op, find_xid = 0;
struct reply *r;
CONN *conn = qptoc(qp);
+ uint32_t rdma_credit = 0;
xdrs = &inxdrs;
xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr,
wc.wc_bytes_xfer, XDR_DECODE);
/*
@@ -991,10 +1305,11 @@
*/
xid = *(uint32_t *)(uintptr_t)rwid->addr;
/* Skip xid and set the xdr position accordingly. */
XDR_SETPOS(xdrs, sizeof (uint32_t));
(void) xdr_u_int(xdrs, &vers);
+ (void) xdr_u_int(xdrs, &rdma_credit);
(void) xdr_u_int(xdrs, &op);
XDR_DESTROY(xdrs);
if (vers != RPCRDMA_VERS) {
/*
* Invalid RPC/RDMA version. Cannot interoperate.
@@ -1108,17 +1423,21 @@
s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id;
qp = s_recvp->qp;
conn = qptoc(qp);
mutex_enter(&qp->posted_rbufs_lock);
qp->n_posted_rbufs--;
+#if defined(MEASURE_POOL_DEPTH)
+ rib_posted_rbufs(preposted_rbufs - qp->n_posted_rbufs);
+#endif
if (qp->n_posted_rbufs == 0)
cv_signal(&qp->posted_rbufs_cv);
mutex_exit(&qp->posted_rbufs_lock);
if (wc.wc_status == IBT_WC_SUCCESS) {
XDR inxdrs, *xdrs;
uint_t xid, vers, op;
+ uint32_t rdma_credit;
xdrs = &inxdrs;
/* s_recvp->vaddr stores data */
xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr,
wc.wc_bytes_xfer, XDR_DECODE);
@@ -1129,10 +1448,11 @@
*/
xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr;
/* Skip xid and set the xdr position accordingly. */
XDR_SETPOS(xdrs, sizeof (uint32_t));
if (!xdr_u_int(xdrs, &vers) ||
+ !xdr_u_int(xdrs, &rdma_credit) ||
!xdr_u_int(xdrs, &op)) {
rib_rbuf_free(conn, RECV_BUFFER,
(void *)(uintptr_t)s_recvp->vaddr);
XDR_DESTROY(xdrs);
#ifdef DEBUG
@@ -1338,10 +1658,11 @@
static rdma_stat
rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp)
{
rib_qp_t *kqp = NULL;
CONN *conn;
+ rdma_clnt_cred_ctrl_t *cc_info;
ASSERT(qp != NULL);
*qp = NULL;
kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
@@ -1365,10 +1686,25 @@
mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock);
mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
+#if defined (CLNT_INTERRUPT_COAL)
+ kqp->rdmaconn.c_count = 0;
+ conn->c_count = 0;
+ bzero(&kqp->wd, sizeof(struct send_wid));
+ kqp->wd.forw = kqp->wd.back = &kqp->wd;
+#endif
+ /*
+ * Initialize the client credit control
+ * portion of the rdmaconn struct.
+ */
+ kqp->rdmaconn.c_cc_type = RDMA_CC_CLNT;
+ cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
+ cc_info->clnt_cc_granted_ops = 0;
+ cc_info->clnt_cc_in_flight_ops = 0;
+ cv_init(&cc_info->clnt_cc_cv, NULL, CV_DEFAULT, NULL);
*qp = kqp;
return (RDMA_SUCCESS);
}
@@ -1378,10 +1714,11 @@
{
rib_qp_t *kqp = NULL;
ibt_chan_sizes_t chan_sizes;
ibt_rc_chan_alloc_args_t qp_attr;
ibt_status_t ibt_status;
+ rdma_srv_cred_ctrl_t *cc_info;
ASSERT(qp != NULL);
*qp = NULL;
kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
@@ -1439,11 +1776,24 @@
/*
* Set the private data area to qp to be used in callbacks
*/
ibt_set_chan_private(kqp->qp_hdl, (void *)kqp);
kqp->rdmaconn.c_state = C_CONNECTED;
+
+ /*
+ * Initialize the server credit control
+ * portion of the rdmaconn struct.
+ */
+ kqp->rdmaconn.c_cc_type = RDMA_CC_SRV;
+ cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_srv_cc;
+ cc_info->srv_cc_buffers_granted = preposted_rbufs;
+ cc_info->srv_cc_cur_buffers_used = 0;
+ cc_info->srv_cc_posted = preposted_rbufs;
+
*qp = kqp;
+
+ num_clients++;
return (RDMA_SUCCESS);
fail:
if (kqp)
kmem_free(kqp, sizeof (rib_qp_t));
@@ -1722,12 +2072,12 @@
qp_attr.rc_flags = IBT_WR_SIGNALED;
chan_args.oc_path = path;
chan_args.oc_cm_handler = rib_clnt_cm_handler;
chan_args.oc_cm_clnt_private = (void *)rib_stat;
- chan_args.oc_rdma_ra_out = 1;
- chan_args.oc_rdma_ra_in = 1;
+ chan_args.oc_rdma_ra_out = 4;
+ chan_args.oc_rdma_ra_in = 4;
chan_args.oc_path_retry_cnt = 2;
chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES;
refresh:
rw_enter(&hca->state_lock, RW_READER);
@@ -1900,10 +2250,20 @@
kmem_free(conn->c_raddr.buf, conn->c_raddr.len);
}
if (conn->c_laddr.buf != NULL) {
kmem_free(conn->c_laddr.buf, conn->c_laddr.len);
}
+
+ /*
+ * Credit control cleanup.
+ */
+ if (qp->rdmaconn.c_cc_type == RDMA_CC_CLNT) {
+ rdma_clnt_cred_ctrl_t *cc_info;
+ cc_info = &qp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
+ cv_destroy(&cc_info->clnt_cc_cv);
+ }
+
kmem_free(qp, sizeof (rib_qp_t));
/*
* If HCA has been DETACHED and the srv/clnt_conn_list is NULL,
* then the hca is no longer being used.
@@ -1925,13 +2285,37 @@
}
rw_exit(&hca->srv_conn_list.conn_lock);
}
rw_exit(&hca->state_lock);
}
+
+ num_clients--;
return (RDMA_SUCCESS);
}
+#ifdef DYNAMIC_CREDIT_CONTROL
+void rib_get_resource_info(CONN *conn, int *current_clients, int *avail_bufs)
+{
+ rib_qp_t *qp = ctoqp(conn);
+ rib_hca_t *hca = qp->hca;
+ rib_bufpool_t *rbp = NULL;
+ bufpool_t *bp;
+
+ is_server = 1;
+ rbp = hca->recv_pool;
+
+ if (rbp == NULL)
+ *avail_bufs = 0;
+ else {
+ bp = rbp->bpool;
+ *avail_bufs = bp->buffree;
+ }
+
+ *current_clients = num_clients;
+}
+#endif
+
/*
* Wait for send completion notification. Only on receiving a
* notification be it a successful or error completion, free the
* send_wid.
*/
@@ -2062,12 +2446,17 @@
* Send buffers are freed here only in case of error in posting
* on QP. If the post succeeded, the send buffers are freed upon
* send completion in rib_sendwait() or in the scq_handler.
*/
rdma_stat
+#if defined(ASYNC_SERVER_DEREG)
rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
- int send_sig, int cv_sig)
+ int send_sig, int cv_sig, caddr_t c, caddr_t c1, int l1, caddr_t c2, int l2, int l3, int l4)
+#else
+rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
+ int send_sig, int cv_sig, caddr_t *swid)
+#endif
{
struct send_wid *wdesc;
struct clist *clp;
ibt_status_t ibt_status = IBT_SUCCESS;
rdma_stat ret = RDMA_SUCCESS;
@@ -2100,15 +2489,26 @@
if (send_sig) {
/* Set SEND_SIGNAL flag. */
tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
wdesc = rib_init_sendwait(msgid, cv_sig, qp);
+ *swid = (caddr_t)wdesc;
} else {
tx_wr.wr_flags = IBT_WR_NO_FLAGS;
wdesc = rib_init_sendwait(msgid, 0, qp);
+ *swid = (caddr_t)wdesc;
}
wdesc->nsbufs = nds;
+#if defined(ASYNC_SERVER_DEREG)
+ wdesc->c = c;
+ wdesc->c1 = c1;
+ wdesc->c2 = c2;
+ wdesc->l1 = l1;
+ wdesc->l2 = l2;
+ wdesc->wl = l3;
+ wdesc->rl = l4;
+#endif
for (i = 0; i < nds; i++) {
wdesc->sbufaddr[i] = sgl[i].ds_va;
}
tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
@@ -2161,21 +2561,72 @@
}
return (RDMA_SUCCESS);
}
+#if defined (CLNT_INTERRUPT_COAL)
rdma_stat
+rib_send_bl(CONN *conn, struct clist *cl, uint32_t msgid)
+{
+ rdma_stat ret;
+ struct send_wid *sd, dlist;
+ rib_qp_t *qp = ctoqp(conn);
+ caddr_t wd;
+ mutex_enter(&conn->c_lock);
+ if((conn->c_count+1) >= (preposted_rbufs/2)){
+ conn->c_count = 0;
+ dlist.forw = dlist.back = &dlist;
+ while(qp->wd.forw != &qp->wd){
+ sd = qp->wd.forw;
+ remque(sd);
+ insque(sd,&dlist);
+ }
+ mutex_exit(&conn->c_lock);
+ ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd);
+ while(dlist.forw != &dlist){
+ sd = dlist.forw;
+ remque(dlist.forw);
+ rib_scq_free((caddr_t)sd);
+ }
+ }else{
+ mutex_exit(&conn->c_lock);
+ wd = 0;
+ ret = rib_send_and_wait(conn, cl, msgid, 0, 0, &wd);
+ mutex_enter(&conn->c_lock);
+ conn->c_count ++;
+ insque(wd, &qp->wd);
+ mutex_exit(&conn->c_lock);
+ }
+ return (ret);
+}
+#endif
+
+rdma_stat
rib_send(CONN *conn, struct clist *cl, uint32_t msgid)
{
rdma_stat ret;
+ /* send-wait & cv_signal */
+#if defined(ASYNC_SERVER_DEREG)
+ ret = rib_send_and_wait(conn, cl, msgid,1,1,0,0,0,0,0,0,0, &wd);
+#else
+ ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd);
+#endif
+ return (ret);
+}
+#if defined(ASYNC_SERVER_DEREG)
+rdma_stat
+rib_send_nw(CONN *conn, struct clist *cl, uint32_t msgid, caddr_t c, caddr_t c1, int c2, caddr_t c3, int c4, int c5, int c6)
+{
+ rdma_stat ret;
+ caddr_t *wid;
/* send-wait & cv_signal */
- ret = rib_send_and_wait(conn, cl, msgid, 1, 1);
+ ret = rib_send_and_wait(conn, cl, msgid, 1, 0, c, c1, c2, c3, c4, c5, c6, wid);
return (ret);
}
-
+#endif
/*
* Server interface (svc_rdma_ksend).
* Send RPC reply and wait for RDMA_DONE.
*/
rdma_stat
@@ -2182,17 +2633,22 @@
rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid)
{
rdma_stat ret = RDMA_SUCCESS;
struct rdma_done_list *rd;
clock_t timout, cv_wait_ret;
+ caddr_t *wid;
rib_qp_t *qp = ctoqp(conn);
mutex_enter(&qp->rdlist_lock);
rd = rdma_done_add(qp, msgid);
/* No cv_signal (whether send-wait or no-send-wait) */
- ret = rib_send_and_wait(conn, cl, msgid, 1, 0);
+#if defined(ASYNC_SERVER_DEREG)
+ ret = rib_send_and_wait(conn, cl, msgid, 1, 0, 0, 0, 0, 0, 0, 0, 0, wid);
+#else
+ ret = rib_send_and_wait(conn, cl, msgid, 1, 0, wid);
+#endif
if (ret != RDMA_SUCCESS) {
#ifdef DEBUG
cmn_err(CE_WARN, "rib_send_resp: send_and_wait "
"failed, msgid %u, qp %p", msgid, (void *)qp);
#endif
@@ -2496,11 +2952,10 @@
*/
rdma_stat
rib_write(CONN *conn, struct clist *cl, int wait)
{
ibt_send_wr_t tx_wr;
- int nds;
int cv_sig;
ibt_wr_ds_t sgl[DSEG_MAX];
struct send_wid *wdesc;
ibt_status_t ibt_status;
rdma_stat ret = RDMA_SUCCESS;
@@ -2509,30 +2964,20 @@
if (cl == NULL) {
cmn_err(CE_WARN, "rib_write: NULL clist\n");
return (RDMA_FAILED);
}
+
+ while ((cl != NULL)) {
+ if(cl->c_len > 0){
bzero(&tx_wr, sizeof (ibt_send_wr_t));
- /*
- * Remote address is at the head chunk item in list.
- */
tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->c_daddr;
tx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_dmemhandle.mrc_rmr; /* rkey */
+ sgl[0].ds_va = cl->c_saddr;
+ sgl[0].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
+ sgl[0].ds_len = cl->c_len;
- nds = 0;
- while (cl != NULL) {
- if (nds >= DSEG_MAX) {
- cmn_err(CE_WARN, "rib_write: DSEG_MAX too small!");
- return (RDMA_FAILED);
- }
- sgl[nds].ds_va = cl->c_saddr;
- sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
- sgl[nds].ds_len = cl->c_len;
- cl = cl->c_next;
- nds++;
- }
-
if (wait) {
tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
cv_sig = 1;
} else {
tx_wr.wr_flags = IBT_WR_NO_FLAGS;
@@ -2541,11 +2986,11 @@
wdesc = rib_init_sendwait(0, cv_sig, qp);
tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
tx_wr.wr_opcode = IBT_WRC_RDMAW;
tx_wr.wr_trans = IBT_RC_SRV;
- tx_wr.wr_nds = nds;
+ tx_wr.wr_nds = 1;
tx_wr.wr_sgl = sgl;
mutex_enter(&conn->c_lock);
if (conn->c_state & C_CONNECTED) {
ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
@@ -2565,10 +3010,13 @@
ret = rib_sendwait(qp, wdesc);
if (ret != 0) {
return (ret);
}
}
+ }
+ cl = cl->c_next;
+ }
return (RDMA_SUCCESS);
}
/*
* RDMA Read a buffer from the remote address.
@@ -2683,11 +3131,11 @@
rpcib_state_t *ribstat;
rib_hca_t *hca;
rdma_stat status = RDMA_SUCCESS;
int i;
struct clist cl;
- rdma_buf_t rdbuf;
+ rdma_buf_t rdbuf = {0};
void *buf = NULL;
ibt_cm_req_rcv_t cm_req_rcv;
CONN *conn;
ibt_status_t ibt_status;
ibt_ar_t ar_query, ar_result;
@@ -2768,12 +3216,12 @@
}
}
#endif
ret_args->cm_ret.rep.cm_channel = qp->qp_hdl;
- ret_args->cm_ret.rep.cm_rdma_ra_out = 1;
- ret_args->cm_ret.rep.cm_rdma_ra_in = 1;
+ ret_args->cm_ret.rep.cm_rdma_ra_out = 4;
+ ret_args->cm_ret.rep.cm_rdma_ra_in = 4;
ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES;
/*
* Pre-posts RECV buffers
*/
@@ -3693,44 +4141,149 @@
return (0);
}
rdma_stat
-rib_registermem(CONN *conn, caddr_t buf, uint_t buflen,
+rib_registermem(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen,
struct mrc *buf_handle)
{
ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */
+#ifdef IB_FMR_SUP
+ ibt_pmr_desc_t pmr_desc; /* vaddr, lkey, rkey */
+ ibt_ma_hdl_t ma_hdl = NULL;
+#endif
ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */
rdma_stat status;
rib_hca_t *hca = (ctoqp(conn))->hca;
/*
* Note: ALL buffer pools use the same memory type RDMARW.
*/
- status = rib_reg_mem(hca, buf, buflen, 0, &mr_hdl, &mr_desc);
+#ifdef IB_FMR_SUP
+ status = rib_reg_mem_fmr(hca, adsp, buf, buflen, 0, &mr_hdl, &ma_hdl,
+ &pmr_desc);
if (status == RDMA_SUCCESS) {
buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
+ buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey;
+ buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey;
+ buf_handle->mrc_lma = (uintptr_t)ma_hdl;
+ goto ret_stat;
+ } else {
+ buf_handle->mrc_linfo = NULL;
+ buf_handle->mrc_lma = NULL;
+ buf_handle->mrc_lmr = 0;
+ buf_handle->mrc_rmr = 0;
+ }
+#endif
+ status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
+ if (status == RDMA_SUCCESS) {
+ buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
} else {
buf_handle->mrc_linfo = NULL;
buf_handle->mrc_lmr = 0;
buf_handle->mrc_rmr = 0;
}
+ ret_stat:
return (status);
}
+#ifdef IB_FMR_SUP
static rdma_stat
-rib_reg_mem(rib_hca_t *hca, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
+rib_reg_mem_fmr(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
+ ibt_mr_hdl_t *mr_hdlp, ibt_ma_hdl_t *ma_hdlp, ibt_pmr_desc_t *pmr_descp)
+{
+ ibt_va_attr_t va_attr;
+ ibt_phys_buf_t *paddr_list;
+ uint_t paddr_list_len, num_paddr;
+ size_t buf_sz = 0;
+ ibt_pmr_attr_t pmr_attr;
+ ib_memlen_t paddr_offset;
+ ibt_status_t ibt_status;
+ uint_t h_page_sz;
+ if(adsp)
+ return(RDMA_FAILED);
+ bzero(&va_attr, sizeof (ibt_va_attr_t));
+ va_attr.va_vaddr = (ib_vaddr_t)buf;
+ va_attr.va_len = size;
+ va_attr.va_as = (struct as *)(caddr_t)adsp;
+ va_attr.va_flags = IBT_VA_FMR | IBT_VA_SLEEP;
+ if (spec == IBT_MR_NONCOHERENT)
+ va_attr.va_flags |= IBT_VA_NONCOHERENT;
+ va_attr.va_phys_buf_min = va_attr.va_phys_buf_max = 0;
+
+ h_page_sz = hca->hca_attrs.hca_page_sz * 1024;
+ paddr_list_len = (size / h_page_sz) + 2;
+ paddr_list = (ibt_phys_buf_t *)kmem_zalloc(sizeof (ibt_phys_buf_t) *
+ paddr_list_len, KM_NOSLEEP);
+
+ if (rib_debug > 0) {
+ cmn_err(CE_NOTE, "fmr: vaddr %p, size %d paddr_list_len %d \n",
+ buf, size, paddr_list_len);
+ }
+
+ ibt_status = ibt_map_mem_area(hca->hca_hdl, &va_attr, paddr_list_len,
+ paddr_list, &num_paddr, &buf_sz, &paddr_offset, ma_hdlp);
+ if (ibt_status != IBT_SUCCESS) {
+ cmn_err(CE_WARN, "rib_reg_mem_fmr: ibt_map_mem_area failed: "
+ "status %d", ibt_status);
+ kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len);
+ return (RDMA_FAILED);
+ }
+
+ if (rib_debug > 0) {
+ cmn_err(CE_NOTE,"fmr: p_laddr %p, p_size %d, buf_sz %d, p_ofset %llX\n",
+ paddr_list[0].p_laddr, paddr_list[0].p_size, buf_sz,
+ paddr_offset);
+ cmn_err(CE_NOTE,"fmr: ibt_map_mem_area: ret %d, num_paddr %d, spec %d\n",
+ ibt_status, num_paddr, spec);
+ }
+
+ bzero(&pmr_attr, sizeof (ibt_pmr_attr_t));
+ pmr_attr.pmr_iova = (ib_vaddr_t)buf;
+ pmr_attr.pmr_len = size;
+ pmr_attr.pmr_num_buf = num_paddr;
+ pmr_attr.pmr_buf_sz = buf_sz;
+ pmr_attr.pmr_buf_list = paddr_list;
+ pmr_attr.pmr_offset = paddr_offset;
+ pmr_attr.pmr_flags = spec;
+ pmr_attr.pmr_ma = *ma_hdlp;
+
+ ibt_status = ibt_register_physical_fmr(hca->hca_hdl, hca->fmr_pool,
+ &pmr_attr, mr_hdlp, pmr_descp);
+ if (ibt_status != IBT_SUCCESS) {
+ cmn_err(CE_WARN, "rib_reg_mem_fmr: ibt_register_physical_fmr "
+ "failed: status %d", ibt_status);
+ (void) ibt_unmap_mem_area(hca->hca_hdl, *ma_hdlp);
+ *ma_hdlp=NULL;
+ kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len);
+ return (RDMA_FAILED);
+ }
+
+ if (rib_debug > 0) {
+ cmn_err(CE_NOTE,"fmr: rkey: 0x%lX lkey: 0x%lX, iova: %p, fmr_hdl %p \n",
+ pmr_descp->pmd_rkey, pmr_descp->pmd_lkey,
+ pmr_descp->pmd_iova, *mr_hdlp);
+ }
+
+ kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len);
+
+ return (RDMA_SUCCESS);
+
+}
+
+#endif
+static rdma_stat
+rib_reg_mem(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp)
{
ibt_mr_attr_t mem_attr;
ibt_status_t ibt_status;
-
mem_attr.mr_vaddr = (uintptr_t)buf;
mem_attr.mr_len = (ib_msglen_t)size;
- mem_attr.mr_as = NULL;
+ mem_attr.mr_as = (struct as *)(caddr_t)adsp;
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE |
IBT_MR_ENABLE_WINDOW_BIND | spec;
rw_enter(&hca->state_lock, RW_READER);
@@ -3751,58 +4304,160 @@
}
return (RDMA_SUCCESS);
}
rdma_stat
-rib_registermemsync(CONN *conn, caddr_t buf, uint_t buflen,
+rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen,
+#ifdef SERVER_REG_CACHE
+ struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc)
+#else
struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle)
+#endif
{
ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */
+#ifdef IB_FMR_SUP
+ ibt_pmr_desc_t pmr_desc; /* vaddr, lkey, rkey */
+ ibt_ma_hdl_t ma_hdl = NULL;
+#endif
+#ifdef SERVER_REG_CACHE
+ rib_lrc_entry_t *l;
+#endif
ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */
rdma_stat status;
rib_hca_t *hca = (ctoqp(conn))->hca;
/*
* Non-coherent memory registration.
*/
- status = rib_reg_mem(hca, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
+#ifdef SERVER_REG_CACHE
+ l = (rib_lrc_entry_t *)lrc;
+ if(l){
+ if(l->registered){
+ buf_handle->mrc_linfo = (uintptr_t)l->lrc_mhandle.mrc_linfo;
+ buf_handle->mrc_lmr = (uint32_t)l->lrc_mhandle.mrc_lmr;
+ buf_handle->mrc_rmr = (uint32_t)l->lrc_mhandle.mrc_rmr;
+#ifdef IB_FMR_SUP
+ buf_handle->mrc_lma = (uintptr_t)l->lrc_mhandle.mrc_lma;
+#endif
+ *sync_handle = (RIB_SYNCMEM_HANDLE)l->lrc_mhandle.mrc_linfo;
+ return(RDMA_SUCCESS);
+ } else {
+ /* Always register the whole buffer */
+ buf = (caddr_t)l->lrc_buf;
+ buflen = l->lrc_len;
+ /*cmn_err(CE_NOTE,"Register %p of length %d\n",buf,buflen);*/
+ }
+ }
+#endif
+#ifdef IB_FMR_SUP
+ status = rib_reg_mem_fmr(hca, adsp, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
+ &ma_hdl, &pmr_desc);
+ if (status == RDMA_SUCCESS) {
+ buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
+ buf_handle->mrc_lma = (uintptr_t)ma_hdl;
+ buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey;
+ buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey;
+ *sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
+#ifdef SERVER_REG_CACHE
+ if(l){
+ l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl;
+ l->lrc_mhandle.mrc_lmr = (uint32_t)mr_desc.md_lkey;
+ l->lrc_mhandle.mrc_rmr = (uint32_t)mr_desc.md_rkey;
+ l->registered = TRUE;
+ l->lrc_mhandle.mrc_lma = (uintptr_t)ma_hdl;
+ }
+#endif
+ goto ret_stat;
+
+ } else {
+ if (rib_debug > 1)
+ cmn_err(CE_WARN,"fmr reg failed for buffer %p of length %d\n",buf,buflen);
+ buf_handle->mrc_linfo = NULL;
+ buf_handle->mrc_lma = NULL;
+ buf_handle->mrc_lmr = 0;
+ buf_handle->mrc_rmr = 0;
+ }
+#endif
+ status = rib_reg_mem(hca, adsp, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
&mr_desc);
if (status == RDMA_SUCCESS) {
+#ifdef SERVER_REG_CACHE
+ if(l){
+ l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl;
+ l->lrc_mhandle.mrc_lmr = (uint32_t)mr_desc.md_lkey;
+ l->lrc_mhandle.mrc_rmr = (uint32_t)mr_desc.md_rkey;
+ l->registered = TRUE;
+ }
+#endif
buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
*sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
} else {
buf_handle->mrc_linfo = NULL;
buf_handle->mrc_lmr = 0;
buf_handle->mrc_rmr = 0;
}
+ ret_stat:
return (status);
}
/* ARGSUSED */
rdma_stat
rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle)
{
+ avl_index_t where = NULL;
+#ifdef IB_FMR_SUP
+ ibt_status_t ibt_status;
+#endif
rib_hca_t *hca = (ctoqp(conn))->hca;
-
/*
* Allow memory deregistration even if HCA is
* getting detached. Need all outstanding
* memory registrations to be deregistered
* before HCA_DETACH_EVENT can be accepted.
*/
+#ifdef IB_FMR_SUP
+ if(buf_handle.mrc_lma){
+ ibt_status = ibt_unmap_mem_area(hca->hca_hdl,
+ (ibt_ma_hdl_t)buf_handle.mrc_lma);
+ if (ibt_status != IBT_SUCCESS){
+ cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed",
+ ibt_status);
+ return (RDMA_FAILED);
+ }
+
+ ibt_status = ibt_deregister_fmr(hca->hca_hdl,
+ (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
+ if (ibt_status != IBT_SUCCESS)
+ return (RDMA_FAILED);
+ return (RDMA_SUCCESS);
+ }
+#endif
(void) ibt_deregister_mr(hca->hca_hdl,
(ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
return (RDMA_SUCCESS);
}
/* ARGSUSED */
rdma_stat
rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle,
+#ifdef SERVER_REG_CACHE
+ RIB_SYNCMEM_HANDLE sync_handle, void *lrc)
+#else
RIB_SYNCMEM_HANDLE sync_handle)
+#endif
{
+#ifdef SERVER_REG_CACHE
+ rib_lrc_entry_t *l;
+ l = (rib_lrc_entry_t *)lrc;
+ if(l)
+ if(l->registered)
+ return(RDMA_SUCCESS);
+#endif
+
+
(void) rib_deregistermem(conn, buf, buf_handle);
return (RDMA_SUCCESS);
}
@@ -3877,19 +4532,18 @@
num * sizeof (void *), KM_SLEEP);
mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock);
bp->numelems = num;
+
switch (ptype) {
case SEND_BUFFER:
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
- /* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
bp->rsize = RPC_MSG_SZ;
break;
case RECV_BUFFER:
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
- /* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
bp->rsize = RPC_BUF_SIZE;
break;
default:
goto fail;
}
@@ -3901,14 +4555,14 @@
bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP);
rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num *
sizeof (ibt_mr_hdl_t), KM_SLEEP);
rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num *
sizeof (ibt_mr_desc_t), KM_SLEEP);
-
rw_enter(&hca->state_lock, RW_READER);
if (hca->state != HCA_INITED) {
rw_exit(&hca->state_lock);
+ cmn_err(CE_WARN,"hca->state != HCA_INITED");
goto fail;
}
for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) {
bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t));
mem_attr.mr_vaddr = (uintptr_t)buf;
@@ -3924,11 +4578,10 @@
rw_exit(&hca->state_lock);
goto fail;
}
}
rw_exit(&hca->state_lock);
-
buf = (caddr_t)bp->buf;
for (i = 0; i < num; i++, buf += bp->rsize) {
bp->buflist[i] = (void *)buf;
}
bp->buffree = num - 1; /* no. of free buffers */
@@ -4015,11 +4668,10 @@
if (rbp->mr_hdl)
kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t));
if (rbp->mr_desc)
kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t));
-
if (bp->buf)
kmem_free(bp->buf, bp->bufsize);
mutex_destroy(&bp->buflock);
kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *));
kmem_free(rbp, sizeof (rib_bufpool_t));
@@ -4057,10 +4709,18 @@
return (RDMA_SUCCESS);
} else
return (RDMA_FAILED);
}
+#if defined(MEASURE_POOL_DEPTH)
+static void rib_recv_bufs(uint32_t x) {
+return;
+}
+static void rib_send_bufs(uint32_t x) {
+return;
+}
+#endif
/*
* Fetch a buffer of specified type.
* Note that rdbuf->handle is mw's rkey.
*/
@@ -4107,10 +4767,16 @@
for (i = bp->numelems - 1; i >= 0; i--) {
if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) {
rdbuf->handle.mrc_rmr = (uint32_t)rbp->mr_desc[i].md_rkey;
rdbuf->handle.mrc_linfo = (uintptr_t)rbp->mr_hdl[i];
rdbuf->handle.mrc_lmr = (uint32_t)rbp->mr_desc[i].md_lkey;
+#if defined(MEASURE_POOL_DEPTH)
+ if(ptype == SEND_BUFFER)
+ rib_send_bufs(MAX_BUFS - (bp->buffree+1));
+ if(ptype == RECV_BUFFER)
+ rib_recv_bufs(MAX_BUFS - (bp->buffree+1));
+#endif
bp->buffree--;
if (rib_debug > 1)
cmn_err(CE_NOTE, "rib_rbuf_alloc: %d free bufs "
"(type %d)\n", bp->buffree+1, ptype);
@@ -4958,10 +5624,13 @@
* conn_lists are NULL, so destroy
* buffers, close hca and be done.
*/
rib_rbufpool_destroy(hca, RECV_BUFFER);
rib_rbufpool_destroy(hca, SEND_BUFFER);
+#ifdef SERVER_REG_CACHE
+ rib_destroy_cache(hca);
+#endif
(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
(void) ibt_close_hca(hca->hca_hdl);
hca->hca_hdl = NULL;
}
rw_exit(&hca->cl_conn_list.conn_lock);
@@ -4981,5 +5650,332 @@
(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
(void) ibt_close_hca(hca->hca_hdl);
hca->hca_hdl = NULL;
}
}
+
+#ifdef SERVER_REG_CACHE
+
+static void
+rib_server_side_cache_reclaim(void *argp)
+{
+cache_avl_struct_t *rcas;
+rib_lrc_entry_t *rb;
+rib_hca_t *hca = (rib_hca_t *)argp;
+
+rw_enter(&hca->avl_rw_lock,RW_WRITER);
+rcas = avl_first(&hca->avl_tree);
+if(rcas != NULL)
+ avl_remove(&hca->avl_tree, rcas);
+while(rcas != NULL){
+ while(rcas->r.forw != &rcas->r){
+ rcas->elements--;
+ rb = rcas->r.forw;
+ remque(rb);
+ rib_deregistermem_via_hca(hca, rb->lrc_buf, rb->lrc_mhandle);
+ kmem_free(rb->lrc_buf, rb->lrc_len);
+ kmem_free(rb, sizeof(rib_lrc_entry_t));
+ }
+ mutex_destroy(&rcas->node_lock);
+ kmem_cache_free(hca->server_side_cache,rcas);
+ rcas = avl_first(&hca->avl_tree);
+ if(rcas != NULL)
+ avl_remove(&hca->avl_tree, rcas);
+ }
+rw_exit(&hca->avl_rw_lock);
+}
+
+static int avl_compare(const void *t1,const void *t2) {
+
+if(rib_debug > 1)
+cmn_err(CE_NOTE,"Comparing %d and %d\n",((cache_avl_struct_t *)t1)->len, ((cache_avl_struct_t *)t2)->len);
+if(((cache_avl_struct_t *)t1)->len == ((cache_avl_struct_t *)t2)->len)
+ return 0;
+
+if(((cache_avl_struct_t *)t1)->len < ((cache_avl_struct_t *)t2)->len)
+ return -1;
+
+if(((cache_avl_struct_t *)t1)->len > ((cache_avl_struct_t *)t2)->len)
+ return 1;
+}
+
+static void rib_destroy_cache(rib_hca_t *hca) {
+cache_avl_struct_t *rcas, *root;
+ rib_lrc_entry_t *rb;
+
+hca->avl_init = FALSE;
+kmem_cache_destroy(hca->server_side_cache);
+avl_destroy(&hca->avl_tree);
+rw_destroy(&hca->avl_rw_lock);
+
+}
+
+ static rib_lrc_entry_t *
+ rib_get_server_cache_buf(CONN *conn,uint32_t len)
+ {
+ cache_avl_struct_t cas,*rcas;
+ rib_hca_t *hca = (ctoqp(conn))->hca;
+ rib_lrc_entry_t *reply_buf;
+ avl_index_t where = NULL;
+ struct rib_lrc_entry *forw = NULL;
+ if(!hca->avl_init)
+ goto error_alloc;
+ cas.len = len;
+ rw_enter(&hca->avl_rw_lock, RW_READER);
+ if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas, &where)) == NULL){
+ rw_exit(&hca->avl_rw_lock);
+ rw_enter(&hca->avl_rw_lock, RW_WRITER);
+ /* Recheck to make sure no other thread added the entry in */
+ if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas, &where)) == NULL){
+ /* Allocate an avl tree entry */
+ if(rib_debug > 1)
+ cmn_err(CE_NOTE,"Allocating an avl entry for length %d\n",len);
+ rcas = (cache_avl_struct_t *)kmem_cache_alloc(hca->server_side_cache,KM_SLEEP);
+ bzero(rcas, sizeof(cache_avl_struct_t));
+ rcas->elements = 0;
+ rcas->r.forw =
+ &rcas->r;
+ rcas->r.back =
+ &rcas->r;
+ rcas->len = len;
+ mutex_init(&rcas->node_lock, NULL, MUTEX_DEFAULT, NULL);
+ avl_insert(&hca->avl_tree,rcas,where);
+ }
+ }
+ if(rcas->elements > 0){
+ mutex_enter(&rcas->node_lock);
+ reply_buf = rcas->r.forw;
+ remque(reply_buf);
+ rcas->elements --;
+ mutex_exit(&rcas->node_lock);
+ rw_exit(&hca->avl_rw_lock);
+ if(rib_debug > 1)
+ cmn_err(CE_NOTE,"Allocating a pre-alloced buffer for length %d\n",len);
+ } else {
+ rw_exit(&hca->avl_rw_lock);
+ rib_total_buffers ++;
+ if(rib_debug > 1)
+ cmn_err(CE_NOTE,"Allocating a new buffer for length %d\n",len);
+ /* Allocate a reply_buf entry */
+ reply_buf = (rib_lrc_entry_t *)kmem_alloc(sizeof(rib_lrc_entry_t), KM_SLEEP);
+ bzero(reply_buf,sizeof(rib_lrc_entry_t));
+ reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP);
+ reply_buf->lrc_len = len;
+ reply_buf->registered = FALSE;
+ reply_buf->avl_node = (void *)rcas;
+ }
+
+ return reply_buf;
+ error_alloc:
+ reply_buf = (rib_lrc_entry_t *)kmem_alloc(sizeof(rib_lrc_entry_t), KM_SLEEP);
+ bzero(reply_buf,sizeof(rib_lrc_entry_t));
+ reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP);
+ reply_buf->lrc_len = len;
+ reply_buf->registered = FALSE;
+ reply_buf->avl_node = NULL;
+ return reply_buf;
+}
+
+ /*
+ * Return a pre-registered back to the cache (without
+ * unregistering the buffer)..
+ */
+
+static void
+rib_free_server_cache_buf(CONN *conn, rib_lrc_entry_t *reg_buf)
+{
+ cache_avl_struct_t cas,*rcas;
+ avl_index_t where = NULL;
+ rib_hca_t *hca = (ctoqp(conn))->hca;
+ if(!reg_buf){
+ cmn_err(CE_WARN,"Got a null reg_buf\n");
+ return;
+ }
+ if(!hca->avl_init)
+ goto error_free;
+ cas.len = reg_buf->lrc_len;
+ rw_enter(&hca->avl_rw_lock, RW_READER);
+ if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree,&cas,&where)) == NULL){
+ rw_exit(&hca->avl_rw_lock);
+ goto error_free;
+ } else {
+ mutex_enter(&rcas->node_lock);
+ insque(reg_buf,&rcas->r);
+ rcas->elements ++;
+ mutex_exit(&rcas->node_lock);
+ rw_exit(&hca->avl_rw_lock);
+ if(rib_debug > 1)
+ cmn_err(CE_NOTE,"Returning buffer for length %d\n",reg_buf->lrc_len);
+ }
+ return;
+ error_free:
+ rib_deregistermem_via_hca(hca, reg_buf->lrc_buf, reg_buf->lrc_mhandle);
+ kmem_free(reg_buf->lrc_buf,reg_buf->lrc_len);
+ kmem_free(reg_buf,sizeof(rib_lrc_entry_t));
+}
+
+#endif
+
+static rdma_stat
+ rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, caddr_t buf,
+ uint_t buflen, struct mrc *buf_handle)
+ {
+ ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */
+#ifdef IB_FMR_SUP
+ ibt_pmr_desc_t pmr_desc; /* vaddr, lkey, rkey */
+ ibt_ma_hdl_t ma_hdl = NULL;
+#endif
+ ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */
+ rdma_stat status;
+
+
+ /*
+ * Note: ALL buffer pools use the same memory type RDMARW.
+ */
+ /* This code will not be activated on the server. We could remove
+ the call to rib_reg_mem_fmr. But leave it in, in case the FMR
+ bugs get fixed. The bigger question is whether we need FMR when
+ the registered bufffers are coming out of a slab cache. This needs
+ to be evaluated.
+ */
+#ifdef IB_FMR_SUP
+ status = rib_reg_mem_fmr(hca, buf, adsp, buflen, 0, &mr_hdl, &ma_hdl,
+ &pmr_desc);
+ if (status == RDMA_SUCCESS) {
+ buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
+ buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey;
+ buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey;
+ buf_handle->mrc_lma = (uintptr_t)ma_hdl;
+ goto ret_stat;
+ } else {
+ buf_handle->mrc_linfo = NULL;
+ buf_handle->mrc_lma = NULL;
+ buf_handle->mrc_lmr = 0;
+ buf_handle->mrc_rmr = 0;
+ }
+#endif
+ status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
+ if (status == RDMA_SUCCESS) {
+ buf_handle->mrc_linfo = (uint64_t)mr_hdl;
+ buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
+ buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
+ } else {
+ buf_handle->mrc_linfo = NULL;
+ buf_handle->mrc_lmr = 0;
+ buf_handle->mrc_rmr = 0;
+ }
+ ret_stat:
+ return (status);
+}
+
+/* ARGSUSED */
+static rdma_stat
+rib_deregistermemsync_via_hca(rib_hca_t *hca, caddr_t buf,
+ struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle)
+ {
+
+ (void) rib_deregistermem_via_hca(hca, buf, buf_handle);
+
+ return (RDMA_SUCCESS);
+ }
+
+/* ARGSUSED */
+ static rdma_stat
+rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle)
+{
+#ifdef IB_FMR_SUP
+ ibt_status_t ibt_status;
+ if(buf_handle.mrc_lma){
+ ibt_status = ibt_unmap_mem_area(hca->hca_hdl,
+ (ibt_ma_hdl_t)buf_handle.mrc_lma);
+ if (ibt_status != IBT_SUCCESS){
+ cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed",
+ ibt_status);
+ return (RDMA_FAILED);
+ }
+ ibt_status = ibt_deregister_fmr(hca->hca_hdl,
+ (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
+ if (ibt_status != IBT_SUCCESS){
+ cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed",
+ ibt_status);
+ return (RDMA_FAILED);
+ }
+ return (RDMA_SUCCESS);
+ }
+#endif
+
+ (void) ibt_deregister_mr(hca->hca_hdl,
+ (ibt_mr_hdl_t)buf_handle.mrc_linfo);
+ return (RDMA_SUCCESS);
+}
+
+#if defined(ASYNC_SERVER_DEREG)||defined(ASYNC_CLIENT_DEREG)
+static int
+clist_deregister1(CONN *conn, struct clist *cl, bool_t src)
+{
+ struct clist *c;
+
+ for (c = cl; c; c = c->c_next) {
+ if (src) {
+ if (c->c_smemhandle.mrc_rmr != 0) {
+ (void) RDMA_DEREGMEMSYNC(conn,
+ (caddr_t)(uintptr_t)c->c_saddr,
+ c->c_smemhandle,
+#ifdef SERVER_REG_CACHE
+ (void *)(uintptr_t)c->c_ssynchandle, (void *)c->long_reply_buf);
+#else
+ (void *)(uintptr_t)c->c_ssynchandle);
+#endif
+ c->c_smemhandle.mrc_rmr = 0;
+ c->c_ssynchandle = NULL;
+ }
+ } else {
+ if (c->c_dmemhandle.mrc_rmr != 0) {
+ (void) RDMA_DEREGMEMSYNC(conn,
+ (caddr_t)(uintptr_t)c->c_daddr,
+ c->c_dmemhandle,
+#ifdef SERVER_REG_CACHE
+ (void *)(uintptr_t)c->c_dsynchandle, (void *)c->long_reply_buf);
+#else
+ (void *)(uintptr_t)c->c_dsynchandle);
+#endif
+ c->c_dmemhandle.mrc_rmr = 0;
+ c->c_dsynchandle = NULL;
+ }
+ }
+ }
+
+ return (RDMA_SUCCESS);
+}
+#endif
+
+
+
+#if defined(ASYNC_CLIENT_DEREG)
+static void
+async_dereg_thread(caddr_t arg){
+ ASYNC *r;
+ cmn_err(CE_WARN,"async_dereg_thread initiated\n");
+ fetch_another_entry:
+ mutex_enter(&at_mutex);
+ while ((rqueue.forw == rqueue.back) && (rqueue.forw == &rqueue))
+ cv_wait(&at_cond, &at_mutex);
+ r=rqueue.forw;
+ remque(rqueue.forw);
+ mutex_exit(&at_mutex);
+ /* Process deregistration */
+ clist_deregister1(&r->c_conn, &r->c_clist, FALSE);
+ kmem_free(r, sizeof(ASYNC));
+ goto fetch_another_entry;
+
+}
+void insert_queue(CONN *conn, struct clist *rwc){
+ASYNC *r;
+ r=kmem_zalloc(sizeof(ASYNC),KM_SLEEP);
+ r->c_clist = *rwc;
+ r->c_conn = *conn;
+ mutex_enter(&at_mutex);
+ insque(r,&rqueue);
+ cv_broadcast(&at_cond);
+ mutex_exit(&at_mutex);
+}
+#endif