Udiff rpcib.c
--- /webrev/webrev/usr/src/uts/common/rpc/rpcib.c-      Mon Aug 14 13:12:11 2006
+++ rpcib.c     Thu Aug 10 14:05:27 2006
@@ -22,10 +22,24 @@
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
+
+ /* Copyright (c) 2006, The Ohio State University. All rights reserved.
+  *
+  * Portions of this source code is developed by the team members of
+  * The Ohio State University's Network-Based Computing Laboratory (NBCL),
+  * headed by Professor Dhabaleswar K. (DK) Panda.
+  *
+  * Acknowledgements to contributions from developors:
+  *   Ranjit Noronha: noronha@cse.ohio-state.edu
+  *   Lei Chai      : chail@cse.ohio-state.edu
+  *   Weikuan Yu    : yuw@cse.ohio-state.edu
+  *
+  */
+
 #pragma ident  "@(#)rpcib.c    1.29    06/01/25 SMI"
 
 /*
  * The rpcib plugin. Implements the interface for RDMATF's
  * interaction with IBTF.
@@ -55,10 +69,12 @@
 #include <sys/isa_defs.h>
 #include <sys/callb.h>
 #include <sys/sunddi.h>
 #include <sys/sunndi.h>
 
+/* #define IB_FMR_SUP */
+/* #define CLNT_POLL_CQ */
 #include <sys/ib/ibtl/ibti.h>
 #include <rpc/rpc.h>
 #include <rpc/ib.h>
 
 #include <sys/modctl.h>
@@ -68,11 +84,14 @@
 #include <sys/sockio.h>
 #include <sys/vnode.h>
 #include <sys/tiuser.h>
 #include <net/if.h>
 #include <sys/cred.h>
+#include <rpc/rpc_rdma.h>
 
+int num_clients = 0;
+volatile uint32_t is_server = 0;
 
 extern char *inet_ntop(int, const void *, char *, int);
 
 
 /*
@@ -105,10 +124,13 @@
        CB_REV,                 /* rev */
        nodev,                  /* int (*cb_aread)() */
        nodev                   /* int (*cb_awrite)() */
 };
 
+
+
+
 /*
  * Device options
  */
 static struct dev_ops rpcib_ops = {
        DEVO_REV,               /* devo_rev, */
@@ -138,21 +160,41 @@
        MODREV_1,
        (void *)&rib_modldrv,
        NULL
 };
 
+#ifdef SERVER_REG_CACHE
+typedef struct cache_struct {
+avl_node_t         avl_link;
+rib_lrc_entry_t    r;
+uint32_t          len;
+uint32_t          elements;
+kmutex_t           node_lock;
+} cache_avl_struct_t;
+
+
+#if 1
+int rib_total_buffers = 0;
+#endif
+#endif
 /*
  * rib_stat: private data pointer used when registering
  *     with the IBTF.  It is returned to the consumer
  *     in all callbacks.
  */
 static rpcib_state_t *rib_stat = NULL;
 
-#define        RNR_RETRIES     2
+#define        RNR_RETRIES     IBT_RNR_INFINITE_RETRY
 #define        MAX_PORTS       2
 
-int preposted_rbufs = 16;
+#ifdef IB_FMR_SUP
+#define IB_FMR_DIRTY_MARK       32
+#define IB_FMR_MAX_SIZE         1048576
+/*#define IB_FMR_MAX_SIZE         32768 */
+#endif
+
+int preposted_rbufs = RDMA_BUFS_GRANT;
 int send_threshold = 1;
 
 /*
  * State of the plugin.
  * ACCEPT = accepting new connections and requests.
@@ -167,22 +209,37 @@
 
 
 /*
  * RPCIB RDMATF operations
  */
+#if defined(MEASURE_POOL_DEPTH)
+static void rib_posted_rbufs(uint32_t x) { return;}
+#endif
 static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle);
 static rdma_stat rib_disconnect(CONN *conn);
 static void rib_listen(struct rdma_svc_data *rd);
 static void rib_listen_stop(struct rdma_svc_data *rd);
-static rdma_stat rib_registermem(CONN *conn, caddr_t buf, uint_t buflen,
+static rdma_stat rib_registermem(CONN *conn, caddr_t  adsp, caddr_t buf, uint_t buflen,
        struct mrc *buf_handle);
 static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf,
        struct mrc buf_handle);
-static rdma_stat rib_registermemsync(CONN *conn, caddr_t buf, uint_t buflen,
+static rdma_stat rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp,
+                caddr_t buf, uint_t buflen, struct mrc *buf_handle);
+static rdma_stat rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf,
+         struct mrc buf_handle);
+#ifdef SERVER_REG_CACHE
+static rdma_stat rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
+       struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc);
+static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
+       struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle, void *);
+#else
+static rdma_stat rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
        struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle);
 static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
        struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle);
+
+#endif
 static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle,
        caddr_t buf, int len, int cpu);
 
 static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf);
 
@@ -190,10 +247,20 @@
 static void *rib_rbuf_alloc(CONN *, rdma_buf_t *);
 
 static void rib_rbuf_free(CONN *conn, int ptype, void *buf);
 
 static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid);
+#if defined (CLNT_INTERRUPT_COAL)
+static void rib_scq_free(caddr_t);
+static rdma_stat rib_send_bl(CONN *conn, struct clist *cl, uint32_t msgid);
+#endif
+#if defined(ASYNC_SERVER_DEREG)
+static rdma_stat rib_send_nw(CONN *conn, struct clist *cl, uint32_t msgid, caddr_t, caddr_t, int, caddr_t, int, int, int);
+#endif
+#if defined(ASYNC_CLIENT_DEREG)
+static void insert_queue(CONN  *conn, struct clist  *rwc);
+#endif
 static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid);
 static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid);
 static rdma_stat rib_post_recv(CONN *conn, struct clist *cl);
 static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid);
 static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait);
@@ -200,10 +267,23 @@
 static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait);
 static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rib_hca_t **);
 static rdma_stat rib_conn_get(struct netbuf *, int addr_type, void *, CONN **);
 static rdma_stat rib_conn_release(CONN *conn);
 static rdma_stat rib_getinfo(rdma_info_t *info);
+#ifdef DYNAMIC_CREDIT_CONTROL
+void rib_get_resource_info(CONN *, int *, int *);
+#endif
+
+#ifdef SERVER_REG_CACHE
+static rib_lrc_entry_t *rib_get_server_cache_buf(CONN *conn, uint32_t len);
+static void rib_free_server_cache_buf(CONN *conn, rib_lrc_entry_t *buf);
+static void rib_destroy_cache(rib_hca_t *hca);
+static void
+           rib_server_side_cache_reclaim(void *argp);
+static int avl_compare(const void *t1,const void *t2);
+#endif
+
 static rdma_stat rib_register_ats(rib_hca_t *);
 static void rib_deregister_ats();
 static void rib_stop_services(rib_hca_t *);
 
 /*
@@ -213,12 +293,27 @@
 int get_interfaces(TIUSER *tiptr, int *num);
 int find_addrs(TIUSER *tiptr, char **addrs, int num_ifs);
 int get_ibd_ipaddr(rpcib_ibd_insts_t *);
 rpcib_ats_t *get_ibd_entry(ib_gid_t *, ib_pkey_t, rpcib_ibd_insts_t *);
 void rib_get_ibd_insts(rpcib_ibd_insts_t *);
+#if defined(ASYNC_SERVER_DEREG)||defined(ASYNC_CLIENT_DEREG)
+static int clist_deregister1(CONN *, struct clist *, bool_t );
+#endif
 
-
+#if defined(ASYNC_CLIENT_DEREG)
+typedef struct async_dereg {
+   struct async_dereg  *forw;
+   struct async_dereg  *back;
+               CONN    c_conn;
+               struct clist c_clist;
+} ASYNC;
+static void async_dereg_thread(caddr_t arg);
+extern pri_t            minclsyspri;            /* priority for taskq */
+static ASYNC   rqueue;
+static kmutex_t        at_mutex;         
+static kcondvar_t      at_cond;          
+#endif
 /*
  * RDMA operations the RPCIB module exports
  */
 static rdmaops_t rib_ops = {
        rib_reachable,
@@ -232,17 +327,33 @@
        rib_deregistermemsync,
        rib_syncmem,
        rib_reg_buf_alloc,
        rib_reg_buf_free,
        rib_send,
+#if defined (CLNT_INTERRUPT_COAL)
+       rib_send_bl,
+#endif
+#if defined(ASYNC_SERVER_DEREG)
+       rib_send_nw,
+#endif
        rib_send_resp,
        rib_post_resp,
        rib_post_recv,
        rib_recv,
        rib_read,
        rib_write,
-       rib_getinfo
+       rib_getinfo,
+#ifdef SERVER_REG_CACHE
+       rib_get_server_cache_buf,
+       rib_free_server_cache_buf,
+#endif
+#ifdef DYNAMIC_CREDIT_CONTROL
+        rib_get_resource_info,
+#endif
+#if defined(ASYNC_CLIENT_DEREG)
+       insert_queue,
+#endif
 };
 
 /*
  * RDMATF RPCIB plugin details
  */
@@ -258,12 +369,18 @@
 static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
 static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
 static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
 static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
 static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num);
-static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
+#ifdef IB_FMR_SUP
+static rdma_stat rib_reg_mem_fmr(rib_hca_t *,   caddr_t adsp,caddr_t, uint_t, ibt_mr_flags_t,
+        ibt_mr_hdl_t *, ibt_ma_hdl_t *, ibt_pmr_desc_t *);
+#endif
+static rdma_stat rib_reg_mem(rib_hca_t *,  caddr_t adsp, caddr_t, uint_t, ibt_mr_flags_t,
        ibt_mr_hdl_t *, ibt_mr_desc_t *);
+static rdma_stat rib_reg_mem_user(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
+       ibt_mr_hdl_t *, ibt_mr_desc_t *, caddr_t);
 static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, ibt_path_info_t *);
 static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *,
        rib_qp_t **);
 static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t,
        rib_qp_t **);
@@ -314,12 +431,16 @@
  * debugging in rpcib kernel module.
  * Set it to values greater that 1 to control
  * the amount of debugging messages required.
  */
 int rib_debug = 0;
-
+#if defined(CLNT_POLL_CQ)
+int max_poll_count = 500; 
+#endif
 static int ats_running = 0;
+
+
 int
 _init(void)
 {
        int             error;
 
@@ -571,10 +692,11 @@
 static rdma_stat rib_rem_replylist(rib_qp_t *);
 static int rib_remreply(rib_qp_t *, struct reply *);
 static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *);
 static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *);
 
+
 /*
  * One CQ pair per HCA
  */
 static rdma_stat
 rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler,
@@ -631,11 +753,14 @@
        rdma_stat               status;
        ibt_hca_portinfo_t      *pinfop;
        ibt_pd_flags_t          pd_flags = IBT_PD_NO_FLAGS;
        uint_t                  size, cq_size;
        int                     i;
-
+#ifdef  IB_FMR_SUP
+        ibt_fmr_pool_attr_t     fmr_attr;
+        uint_t                  h_page_sz;
+#endif
        ASSERT(MUTEX_HELD(&ribstat->open_hca_lock));
        if (ribstat->hcas == NULL)
                ribstat->hcas = kmem_zalloc(ribstat->hca_count *
                                    sizeof (rib_hca_t), KM_SLEEP);
 
@@ -744,11 +869,75 @@
                if (hca->send_pool == NULL) {
                        cmn_err(CE_WARN, "open_hcas: send buf pool failed\n");
                        rib_rbufpool_destroy(hca, RECV_BUFFER);
                        goto fail3;
                }
+#ifdef  IB_FMR_SUP
+                /* Global FMR POOL */
+                bzero(&fmr_attr, sizeof (ibt_fmr_pool_attr_t));
 
+                h_page_sz = hca->hca_attrs.hca_page_sz * 1024;
+
+                fmr_attr.fmr_max_pages_per_fmr =
+                    (IB_FMR_MAX_SIZE / h_page_sz) + 2;
+                fmr_attr.fmr_pool_size = MAX_BUFS * 2;
+                fmr_attr.fmr_dirty_watermark = IB_FMR_DIRTY_MARK;
+                fmr_attr.fmr_page_sz = h_page_sz;
+                fmr_attr.fmr_cache = B_FALSE;
+                fmr_attr.fmr_flags = IBT_MR_SLEEP |
+                    IBT_MR_ENABLE_LOCAL_WRITE |
+                    IBT_MR_ENABLE_REMOTE_READ |
+                    IBT_MR_ENABLE_REMOTE_WRITE;
+                fmr_attr.fmr_func_hdlr = NULL;
+
+                if (rib_debug > 1) {
+                        cmn_err(CE_NOTE, "open_hcas: ibt_create_fmr_pool:");
+                        cmn_err(CE_NOTE, "fmr_page_sz %d, fmr_pool_sz %d, "
+                            "max_pages_per_fmr %d", fmr_attr.fmr_page_sz,
+                            fmr_attr.fmr_pool_size,
+                            fmr_attr.fmr_max_pages_per_fmr);
+                }
+
+                ibt_status = ibt_create_fmr_pool(hca->hca_hdl, hca->pd_hdl,
+                    &fmr_attr, &hca->fmr_pool);
+                if (ibt_status != IBT_SUCCESS) {
+                        cmn_err(CE_WARN, "open_hcas: Global FMR pool creation "
+                            "failed: %d\n", ibt_status);
+                        rib_rbufpool_destroy(hca, RECV_BUFFER);
+                        rib_rbufpool_destroy(hca, SEND_BUFFER);
+                        goto fail3;
+                }
+#endif
+#ifdef SERVER_REG_CACHE
+               cmn_err(CE_NOTE,"Registration Cache enabled\n");
+               { 
+               cache_avl_struct_t my_avl_node;
+                hca->server_side_cache =
+                     kmem_cache_create("rib_server_side_cache",
+                     sizeof (cache_avl_struct_t), 0, 
+                    NULL,
+                     NULL, 
+                    rib_server_side_cache_reclaim,                     
+                     hca, NULL, 0);
+                avl_create(&hca->avl_tree,
+                           avl_compare,
+                           sizeof(cache_avl_struct_t),
+                           (uint_t)&my_avl_node.avl_link-(uint_t)&my_avl_node);
+               /* mutex_init(&hca->avl_lock, NULL, MUTEX_DEFAULT, NULL);*/
+               rw_init(&hca->avl_rw_lock, NULL, RW_DRIVER, hca->iblock);
+               hca->avl_init = TRUE;
+               
+               }
+#endif
+
+#if defined(ASYNC_CLIENT_DEREG)
+       rqueue.forw = rqueue.back = &rqueue;
+       mutex_init(&at_mutex, NULL, MUTEX_DEFAULT, NULL);
+        cv_init(&at_cond, NULL, CV_DEFAULT, NULL);
+       (void) thread_create(NULL, 0, async_dereg_thread, NULL, 0, &p0,
+                    TS_RUN, minclsyspri);
+#endif
                /*
                 * Initialize the registered service list and
                 * the lock
                 */
                hca->service_list = NULL;
@@ -886,10 +1075,88 @@
                }
            }
        }
 }
 
+#if defined (CLNT_INTERRUPT_COAL)
+static void
+rib_scq_free(caddr_t widd)
+{
+       struct send_wid *wd = (struct send_wid *)widd;
+       ibt_status_t    ibt_status;
+       ibt_wc_t        wc;
+       int             i;
+       CONN    *conn = qptoc(wd->qp);
+
+               wc.wc_status = RDMA_SUCCESS;
+               mutex_enter(&wd->sendwait_lock);
+               switch (wc.wc_status) {
+               case IBT_WC_SUCCESS:
+                       wd->status = RDMA_SUCCESS;
+                       break;
+               case IBT_WC_WR_FLUSHED_ERR:
+                       wd->status = RDMA_FAILED;
+                       break;
+               default:
+/*
+ *    RC Send Q Error Code             Local state     Remote State
+ *    ====================             ===========     ============
+ *    IBT_WC_BAD_RESPONSE_ERR             ERROR           None
+ *    IBT_WC_LOCAL_LEN_ERR                ERROR           None
+ *    IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           None
+ *    IBT_WC_LOCAL_PROTECT_ERR            ERROR           None
+ *    IBT_WC_MEM_WIN_BIND_ERR             ERROR           None
+ *    IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR
+ *    IBT_WC_REMOTE_ACCESS_ERR            ERROR           ERROR
+ *    IBT_WC_REMOTE_OP_ERR                ERROR           ERROR
+ *    IBT_WC_RNR_NAK_TIMEOUT_ERR          ERROR           None
+ *    IBT_WC_TRANS_TIMEOUT_ERR            ERROR           None
+ *    IBT_WC_WR_FLUSHED_ERR               None            None
+ */
+#ifdef DEBUG
+       if (rib_debug > 1) {
+           if (wc.wc_status != IBT_WC_SUCCESS) {
+                   cmn_err(CE_NOTE, "rib_clnt_scq_handler: "
+                       "WR completed in error, wc.wc_status:%d, "
+                       "wc_id:%llx\n", wc.wc_status, (longlong_t)wc.wc_id);
+           }
+       }
+#endif
+                       /*
+                        * Channel in error state. Set connection to
+                        * ERROR and cleanup will happen either from
+                        * conn_release  or from rib_conn_get
+                        */
+                       wd->status = RDMA_FAILED;
+                       mutex_enter(&conn->c_lock);
+                       if (conn->c_state != C_DISCONN_PEND)
+                               conn->c_state = C_ERROR;
+                       mutex_exit(&conn->c_lock);
+                       break;
+               }
+               if (wd->cv_sig == 1) {
+                       /*
+                        * Notify poster
+                        */
+                       cmn_err(CE_NOTE,"Some error \n");
+                       cv_signal(&wd->wait_cv);
+                       mutex_exit(&wd->sendwait_lock);
+               } else {
+                       /*
+                        * Poster not waiting for notification.
+                        * Free the send buffers and send_wid
+                        */
+                       for (i = 0; i < wd->nsbufs; i++) {
+                               rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER,
+                                       (void *)(uintptr_t)wd->sbufaddr[i]);
+                       }
+                       mutex_exit(&wd->sendwait_lock);
+                       (void) rib_free_sendwait(wd);
+               }
+}
+#endif
+
 /* ARGSUSED */
 static void
 rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
 {
        ibt_status_t    ibt_status;
@@ -919,11 +1186,32 @@
                        wc.wc_status, (longlong_t)wc.wc_id);
            }
 #endif
            if (wc.wc_id != NULL) { /* XXX NULL possible ???? */
                struct send_wid *wd = (struct send_wid *)(uintptr_t)wc.wc_id;
-
+#ifdef ASYNC_SERVER_DEREG
+               if(wd->c1){
+               (void) clist_deregister1((CONN *)wd->c, (struct clist *)wd->c1, TRUE);
+#ifdef  SERVER_REG_CACHE
+                RDMA_FREE_SERVER_CACHE_BUF((CONN *)wd->c, (rib_lrc_entry_t *)(((struct clist *)wd->c1)->long_reply_buf));
+#else
+                if(wd->c1 && wd->l1)
+                kmem_free((void *) (wd->c1)->c_saddr, wd->l1);
+#endif
+               kmem_free((void *)(wd->c1), wd->wl * sizeof(struct clist));
+               }
+               if(wd->c2){
+               (void) clist_deregister1((CONN *)wd->c, (struct clist *)wd->c2, TRUE);
+#ifdef  SERVER_REG_CACHE
+                RDMA_FREE_SERVER_CACHE_BUF((CONN *)wd->c, (rib_lrc_entry_t *)(((struct clist *)wd->c2)->long_reply_buf));
+#else
+                if(wd->l2)
+                kmem_free((void *) (wd->c2)->c_saddr, wd->l2);
+#endif
+               kmem_free((void *)(wd->c2), wd->rl * sizeof(struct clist));
+               }
+#endif
                mutex_enter(&wd->sendwait_lock);
                if (wd->cv_sig == 1) {
                        /*
                         * Update completion status and notify poster
                         */
@@ -958,31 +1246,57 @@
 {
        rib_qp_t        *qp;
        ibt_status_t    ibt_status;
        ibt_wc_t        wc;
        struct recv_wid *rwid;
+#if defined(CLNT_POLL_CQ)
+        uint32_t        count = 0;
+#endif
 
        /*
         * Re-enable cq notify here to avoid missing any
         * completion queue notification.
         */
+#if !defined(CLNT_POLL_CQ)
        (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+#endif
 
        ibt_status = IBT_SUCCESS;
        while (ibt_status != IBT_CQ_EMPTY) {
+#if defined(CLNT_POLL_CQ)
+                poll_cq_again:
+#endif
                bzero(&wc, sizeof (wc));
                ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
+#if defined(CLNT_POLL_CQ)
+                 if (ibt_status == IBT_CQ_EMPTY){
+                                count ++;
+                 if(count == max_poll_count){
+                                (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+                                return;
+                 }
+                 goto poll_cq_again;
+                 }
+#endif
                if (ibt_status != IBT_SUCCESS)
+#if defined(CLNT_POLL_CQ)
+               {
+                        (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
+#endif
                    return;
-
+#if defined(CLNT_POLL_CQ)
+               }
+                count = 0;
+#endif
                rwid = (struct recv_wid *)(uintptr_t)wc.wc_id;
                qp = rwid->qp;
                if (wc.wc_status == IBT_WC_SUCCESS) {
                    XDR                 inxdrs, *xdrs;
                    uint_t              xid, vers, op, find_xid = 0;
                    struct reply        *r;
                    CONN *conn = qptoc(qp);
+                    uint32_t rdma_credit = 0;
 
                    xdrs = &inxdrs;
                    xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr,
                        wc.wc_bytes_xfer, XDR_DECODE);
                /*
@@ -991,10 +1305,11 @@
                 */
                    xid = *(uint32_t *)(uintptr_t)rwid->addr;
                /* Skip xid and set the xdr position accordingly. */
                    XDR_SETPOS(xdrs, sizeof (uint32_t));
                    (void) xdr_u_int(xdrs, &vers);
+                    (void) xdr_u_int(xdrs, &rdma_credit);
                    (void) xdr_u_int(xdrs, &op);
                    XDR_DESTROY(xdrs);
                    if (vers != RPCRDMA_VERS) {
                        /*
                         * Invalid RPC/RDMA version. Cannot interoperate.
@@ -1108,17 +1423,21 @@
                s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id;
                qp = s_recvp->qp;
                conn = qptoc(qp);
                mutex_enter(&qp->posted_rbufs_lock);
                qp->n_posted_rbufs--;
+#if defined(MEASURE_POOL_DEPTH)
+               rib_posted_rbufs(preposted_rbufs -  qp->n_posted_rbufs);
+#endif
                if (qp->n_posted_rbufs == 0)
                        cv_signal(&qp->posted_rbufs_cv);
                mutex_exit(&qp->posted_rbufs_lock);
 
                if (wc.wc_status == IBT_WC_SUCCESS) {
                    XDR         inxdrs, *xdrs;
                    uint_t      xid, vers, op;
+                    uint32_t rdma_credit;
 
                    xdrs = &inxdrs;
                    /* s_recvp->vaddr stores data */
                    xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr,
                        wc.wc_bytes_xfer, XDR_DECODE);
@@ -1129,10 +1448,11 @@
                 */
                    xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr;
                /* Skip xid and set the xdr position accordingly. */
                    XDR_SETPOS(xdrs, sizeof (uint32_t));
                    if (!xdr_u_int(xdrs, &vers) ||
+                        !xdr_u_int(xdrs, &rdma_credit) ||
                        !xdr_u_int(xdrs, &op)) {
                        rib_rbuf_free(conn, RECV_BUFFER,
                                (void *)(uintptr_t)s_recvp->vaddr);
                        XDR_DESTROY(xdrs);
 #ifdef DEBUG
@@ -1338,10 +1658,11 @@
 static rdma_stat
 rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp)
 {
        rib_qp_t        *kqp = NULL;
        CONN            *conn;
+        rdma_clnt_cred_ctrl_t *cc_info;
 
        ASSERT(qp != NULL);
        *qp = NULL;
 
        kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
@@ -1365,10 +1686,25 @@
        mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock);
        mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
        mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
        cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
        mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
+#if defined (CLNT_INTERRUPT_COAL)
+       kqp->rdmaconn.c_count = 0;
+       conn->c_count = 0;
+       bzero(&kqp->wd, sizeof(struct send_wid));
+       kqp->wd.forw = kqp->wd.back = &kqp->wd;
+#endif
+        /*
+         * Initialize the client credit control
+         * portion of the rdmaconn struct.
+         */
+        kqp->rdmaconn.c_cc_type = RDMA_CC_CLNT;
+        cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
+        cc_info->clnt_cc_granted_ops = 0;
+        cc_info->clnt_cc_in_flight_ops = 0;
+        cv_init(&cc_info->clnt_cc_cv, NULL, CV_DEFAULT, NULL);
 
        *qp = kqp;
        return (RDMA_SUCCESS);
 }
 
@@ -1378,10 +1714,11 @@
 {
        rib_qp_t        *kqp = NULL;
        ibt_chan_sizes_t        chan_sizes;
        ibt_rc_chan_alloc_args_t        qp_attr;
        ibt_status_t            ibt_status;
+        rdma_srv_cred_ctrl_t *cc_info;
 
        ASSERT(qp != NULL);
        *qp = NULL;
 
        kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
@@ -1439,11 +1776,24 @@
        /*
         * Set the private data area to qp to be used in callbacks
         */
        ibt_set_chan_private(kqp->qp_hdl, (void *)kqp);
        kqp->rdmaconn.c_state = C_CONNECTED;
+
+        /*
+         * Initialize the server credit control
+         * portion of the rdmaconn struct.
+         */
+        kqp->rdmaconn.c_cc_type = RDMA_CC_SRV;
+        cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_srv_cc;
+        cc_info->srv_cc_buffers_granted = preposted_rbufs;
+        cc_info->srv_cc_cur_buffers_used = 0;
+        cc_info->srv_cc_posted = preposted_rbufs;
+
        *qp = kqp;
+
+        num_clients++;
        return (RDMA_SUCCESS);
 fail:
        if (kqp)
                kmem_free(kqp, sizeof (rib_qp_t));
 
@@ -1722,12 +2072,12 @@
        qp_attr.rc_flags = IBT_WR_SIGNALED;
 
        chan_args.oc_path = path;
        chan_args.oc_cm_handler = rib_clnt_cm_handler;
        chan_args.oc_cm_clnt_private = (void *)rib_stat;
-       chan_args.oc_rdma_ra_out = 1;
-       chan_args.oc_rdma_ra_in = 1;
+       chan_args.oc_rdma_ra_out = 4;
+       chan_args.oc_rdma_ra_in = 4;
        chan_args.oc_path_retry_cnt = 2;
        chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES;
 
 refresh:
        rw_enter(&hca->state_lock, RW_READER);
@@ -1900,10 +2250,20 @@
                kmem_free(conn->c_raddr.buf, conn->c_raddr.len);
        }
        if (conn->c_laddr.buf != NULL) {
                kmem_free(conn->c_laddr.buf, conn->c_laddr.len);
        }
+
+        /*
+         * Credit control cleanup.
+         */
+        if (qp->rdmaconn.c_cc_type == RDMA_CC_CLNT) {
+                rdma_clnt_cred_ctrl_t *cc_info;
+                cc_info = &qp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
+                cv_destroy(&cc_info->clnt_cc_cv);
+        }
+
        kmem_free(qp, sizeof (rib_qp_t));
 
        /*
         * If HCA has been DETACHED and the srv/clnt_conn_list is NULL,
         * then the hca is no longer being used.
@@ -1925,13 +2285,37 @@
                        }
                        rw_exit(&hca->srv_conn_list.conn_lock);
                }
                rw_exit(&hca->state_lock);
        }
+
+        num_clients--;
        return (RDMA_SUCCESS);
 }
 
+#ifdef DYNAMIC_CREDIT_CONTROL
+void rib_get_resource_info(CONN *conn, int *current_clients, int *avail_bufs)
+{
+        rib_qp_t        *qp = ctoqp(conn);
+        rib_hca_t       *hca = qp->hca;
+        rib_bufpool_t   *rbp = NULL;
+        bufpool_t       *bp;
+
+       is_server  = 1;
+        rbp = hca->recv_pool;
+
+        if (rbp == NULL)
+                *avail_bufs = 0;
+        else {
+                bp = rbp->bpool;
+                *avail_bufs = bp->buffree;
+        }
+
+        *current_clients = num_clients;
+}
+#endif
+
 /*
  * Wait for send completion notification. Only on receiving a
  * notification be it a successful or error completion, free the
  * send_wid.
  */
@@ -2062,12 +2446,17 @@
  * Send buffers are freed here only in case of error in posting
  * on QP. If the post succeeded, the send buffers are freed upon
  * send completion in rib_sendwait() or in the scq_handler.
  */
 rdma_stat
+#if defined(ASYNC_SERVER_DEREG)
 rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
-       int send_sig, int cv_sig)
+       int send_sig, int cv_sig, caddr_t c, caddr_t c1, int l1, caddr_t c2, int l2, int l3, int l4)
+#else
+rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
+       int send_sig, int cv_sig, caddr_t *swid)
+#endif
 {
        struct send_wid *wdesc;
        struct clist    *clp;
        ibt_status_t    ibt_status = IBT_SUCCESS;
        rdma_stat       ret = RDMA_SUCCESS;
@@ -2100,15 +2489,26 @@
 
        if (send_sig) {
                /* Set SEND_SIGNAL flag. */
                tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
                wdesc = rib_init_sendwait(msgid, cv_sig, qp);
+               *swid = (caddr_t)wdesc;
        } else {
                tx_wr.wr_flags = IBT_WR_NO_FLAGS;
                wdesc = rib_init_sendwait(msgid, 0, qp);
+               *swid = (caddr_t)wdesc;
        }
        wdesc->nsbufs = nds;
+#if defined(ASYNC_SERVER_DEREG)
+       wdesc->c      = c;
+       wdesc->c1     = c1;
+       wdesc->c2     = c2;
+       wdesc->l1     = l1;
+       wdesc->l2     = l2;
+       wdesc->wl     = l3;
+       wdesc->rl     = l4;
+#endif
        for (i = 0; i < nds; i++) {
                wdesc->sbufaddr[i] = sgl[i].ds_va;
        }
 
        tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
@@ -2161,21 +2561,72 @@
        }
 
        return (RDMA_SUCCESS);
 }
 
+#if defined (CLNT_INTERRUPT_COAL)
 rdma_stat
+rib_send_bl(CONN *conn, struct clist *cl, uint32_t msgid)
+{
+       rdma_stat       ret;
+       struct send_wid *sd, dlist;     
+       rib_qp_t *qp = ctoqp(conn);
+       caddr_t        wd;
+       mutex_enter(&conn->c_lock);
+       if((conn->c_count+1) >= (preposted_rbufs/2)){
+       conn->c_count = 0;
+       dlist.forw = dlist.back = &dlist;
+       while(qp->wd.forw != &qp->wd){
+                sd = qp->wd.forw;
+                remque(sd);
+               insque(sd,&dlist);
+       }
+       mutex_exit(&conn->c_lock);
+       ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd); 
+       while(dlist.forw != &dlist){
+               sd = dlist.forw;
+               remque(dlist.forw);
+               rib_scq_free((caddr_t)sd);
+       }
+       }else{
+               mutex_exit(&conn->c_lock);
+               wd = 0;
+               ret = rib_send_and_wait(conn, cl, msgid, 0, 0, &wd); 
+               mutex_enter(&conn->c_lock);
+               conn->c_count ++;
+               insque(wd, &qp->wd); 
+               mutex_exit(&conn->c_lock);
+       }
+       return (ret);
+}
+#endif
+
+rdma_stat
 rib_send(CONN *conn, struct clist *cl, uint32_t msgid)
 {
        rdma_stat       ret;
+       /* send-wait & cv_signal */
+#if defined(ASYNC_SERVER_DEREG)
+       ret = rib_send_and_wait(conn, cl, msgid,1,1,0,0,0,0,0,0,0, &wd);
+#else
+       ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd); 
+#endif
+       return (ret);
+}
 
+#if defined(ASYNC_SERVER_DEREG)
+rdma_stat
+rib_send_nw(CONN *conn, struct clist *cl, uint32_t msgid, caddr_t c, caddr_t c1, int c2, caddr_t c3, int c4, int c5, int c6)
+{
+       rdma_stat       ret;
+       caddr_t *wid;
        /* send-wait & cv_signal */
-       ret = rib_send_and_wait(conn, cl, msgid, 1, 1);
+       ret = rib_send_and_wait(conn, cl, msgid, 1, 0, c, c1, c2, c3, c4, c5, c6, wid);
 
        return (ret);
 }
-
+#endif
 /*
  * Server interface (svc_rdma_ksend).
  * Send RPC reply and wait for RDMA_DONE.
  */
 rdma_stat
@@ -2182,17 +2633,22 @@
 rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid)
 {
        rdma_stat ret = RDMA_SUCCESS;
        struct rdma_done_list *rd;
        clock_t timout, cv_wait_ret;
+       caddr_t *wid;
        rib_qp_t *qp = ctoqp(conn);
 
        mutex_enter(&qp->rdlist_lock);
        rd = rdma_done_add(qp, msgid);
 
        /* No cv_signal (whether send-wait or no-send-wait) */
-       ret = rib_send_and_wait(conn, cl, msgid, 1, 0);
+#if defined(ASYNC_SERVER_DEREG)
+       ret = rib_send_and_wait(conn, cl, msgid, 1, 0, 0, 0, 0, 0, 0, 0, 0, wid);
+#else
+       ret = rib_send_and_wait(conn, cl, msgid, 1, 0, wid);
+#endif
        if (ret != RDMA_SUCCESS) {
 #ifdef DEBUG
            cmn_err(CE_WARN, "rib_send_resp: send_and_wait "
                "failed, msgid %u, qp %p", msgid, (void *)qp);
 #endif
@@ -2496,11 +2952,10 @@
  */
 rdma_stat
 rib_write(CONN *conn, struct clist *cl, int wait)
 {
        ibt_send_wr_t   tx_wr;
-       int             nds;
        int             cv_sig;
        ibt_wr_ds_t     sgl[DSEG_MAX];
        struct send_wid *wdesc;
        ibt_status_t    ibt_status;
        rdma_stat       ret = RDMA_SUCCESS;
@@ -2509,30 +2964,20 @@
        if (cl == NULL) {
                cmn_err(CE_WARN, "rib_write: NULL clist\n");
                return (RDMA_FAILED);
        }
 
+
+       while ((cl != NULL)) {
+               if(cl->c_len > 0){
        bzero(&tx_wr, sizeof (ibt_send_wr_t));
-       /*
-        * Remote address is at the head chunk item in list.
-        */
        tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->c_daddr;
        tx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_dmemhandle.mrc_rmr; /* rkey */
+               sgl[0].ds_va = cl->c_saddr;
+               sgl[0].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
+               sgl[0].ds_len = cl->c_len;
 
-       nds = 0;
-       while (cl != NULL) {
-               if (nds >= DSEG_MAX) {
-                       cmn_err(CE_WARN, "rib_write: DSEG_MAX too small!");
-                       return (RDMA_FAILED);
-               }
-               sgl[nds].ds_va = cl->c_saddr;
-               sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
-               sgl[nds].ds_len = cl->c_len;
-               cl = cl->c_next;
-               nds++;
-       }
-
        if (wait) {
                tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
                cv_sig = 1;
        } else {
                tx_wr.wr_flags = IBT_WR_NO_FLAGS;
@@ -2541,11 +2986,11 @@
 
        wdesc = rib_init_sendwait(0, cv_sig, qp);
        tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
        tx_wr.wr_opcode = IBT_WRC_RDMAW;
        tx_wr.wr_trans = IBT_RC_SRV;
-       tx_wr.wr_nds = nds;
+       tx_wr.wr_nds = 1;
        tx_wr.wr_sgl = sgl;
 
        mutex_enter(&conn->c_lock);
        if (conn->c_state & C_CONNECTED) {
                ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
@@ -2565,10 +3010,13 @@
                ret = rib_sendwait(qp, wdesc);
                if (ret != 0) {
                        return (ret);
                }
        }
+       }
+               cl = cl->c_next;
+       }
        return (RDMA_SUCCESS);
 }
 
 /*
  * RDMA Read a buffer from the remote address.
@@ -2683,11 +3131,11 @@
        rpcib_state_t   *ribstat;
        rib_hca_t       *hca;
        rdma_stat       status = RDMA_SUCCESS;
        int             i;
        struct clist    cl;
-       rdma_buf_t      rdbuf;
+       rdma_buf_t      rdbuf = {0};
        void            *buf = NULL;
        ibt_cm_req_rcv_t        cm_req_rcv;
        CONN            *conn;
        ibt_status_t ibt_status;
        ibt_ar_t        ar_query, ar_result;
@@ -2768,12 +3216,12 @@
                    }
                }
 #endif
 
                ret_args->cm_ret.rep.cm_channel = qp->qp_hdl;
-               ret_args->cm_ret.rep.cm_rdma_ra_out = 1;
-               ret_args->cm_ret.rep.cm_rdma_ra_in = 1;
+               ret_args->cm_ret.rep.cm_rdma_ra_out = 4;
+               ret_args->cm_ret.rep.cm_rdma_ra_in = 4;
                ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES;
 
                /*
                 * Pre-posts RECV buffers
                 */
@@ -3693,44 +4141,149 @@
 
        return (0);
 }
 
 rdma_stat
-rib_registermem(CONN *conn, caddr_t buf, uint_t buflen,
+rib_registermem(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
        struct mrc *buf_handle)
 {
        ibt_mr_hdl_t    mr_hdl = NULL;  /* memory region handle */
+#ifdef IB_FMR_SUP
+        ibt_pmr_desc_t  pmr_desc;       /* vaddr, lkey, rkey */
+        ibt_ma_hdl_t    ma_hdl = NULL;
+#endif
        ibt_mr_desc_t   mr_desc;        /* vaddr, lkey, rkey */
        rdma_stat       status;
        rib_hca_t       *hca = (ctoqp(conn))->hca;
 
        /*
         * Note: ALL buffer pools use the same memory type RDMARW.
         */
-       status = rib_reg_mem(hca, buf, buflen, 0, &mr_hdl, &mr_desc);
+#ifdef IB_FMR_SUP
+        status = rib_reg_mem_fmr(hca, adsp, buf, buflen, 0, &mr_hdl, &ma_hdl,
+            &pmr_desc);
        if (status == RDMA_SUCCESS) {
                buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
+                buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey;
+                buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey;
+                buf_handle->mrc_lma = (uintptr_t)ma_hdl;
+               goto ret_stat;
+        } else {
+                buf_handle->mrc_linfo = NULL;
+                buf_handle->mrc_lma = NULL;
+                buf_handle->mrc_lmr = 0;
+                buf_handle->mrc_rmr = 0;
+        }
+#endif
+       status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
+       if (status == RDMA_SUCCESS) {
+               buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
                buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
                buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
        } else {
                buf_handle->mrc_linfo = NULL;
                buf_handle->mrc_lmr = 0;
                buf_handle->mrc_rmr = 0;
        }
+        ret_stat:
        return (status);
 }
 
+#ifdef IB_FMR_SUP
 static rdma_stat
-rib_reg_mem(rib_hca_t *hca, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
+rib_reg_mem_fmr(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
+        ibt_mr_hdl_t *mr_hdlp, ibt_ma_hdl_t *ma_hdlp, ibt_pmr_desc_t *pmr_descp)
+{
+        ibt_va_attr_t   va_attr;
+        ibt_phys_buf_t  *paddr_list;
+        uint_t          paddr_list_len, num_paddr;
+        size_t          buf_sz = 0;
+        ibt_pmr_attr_t  pmr_attr;
+        ib_memlen_t     paddr_offset;
+        ibt_status_t    ibt_status;
+        uint_t          h_page_sz;
+       if(adsp) 
+               return(RDMA_FAILED);
+        bzero(&va_attr, sizeof (ibt_va_attr_t));
+        va_attr.va_vaddr = (ib_vaddr_t)buf;
+        va_attr.va_len   = size;
+        va_attr.va_as    = (struct as *)(caddr_t)adsp;
+        va_attr.va_flags = IBT_VA_FMR | IBT_VA_SLEEP;
+        if (spec == IBT_MR_NONCOHERENT)
+                va_attr.va_flags |= IBT_VA_NONCOHERENT;
+        va_attr.va_phys_buf_min = va_attr.va_phys_buf_max = 0;
+
+        h_page_sz = hca->hca_attrs.hca_page_sz * 1024; 
+       paddr_list_len = (size / h_page_sz) + 2;
+        paddr_list = (ibt_phys_buf_t *)kmem_zalloc(sizeof (ibt_phys_buf_t) *
+            paddr_list_len, KM_NOSLEEP);
+
+        if (rib_debug > 0) {
+                cmn_err(CE_NOTE, "fmr: vaddr %p, size %d paddr_list_len %d \n",
+                    buf, size, paddr_list_len);
+        }
+
+        ibt_status = ibt_map_mem_area(hca->hca_hdl, &va_attr, paddr_list_len,
+            paddr_list, &num_paddr, &buf_sz, &paddr_offset, ma_hdlp);
+        if (ibt_status != IBT_SUCCESS) {
+                cmn_err(CE_WARN, "rib_reg_mem_fmr: ibt_map_mem_area failed: "
+                    "status %d", ibt_status);
+                kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len);
+                return (RDMA_FAILED);
+        }
+
+        if (rib_debug > 0) { 
+                cmn_err(CE_NOTE,"fmr: p_laddr %p, p_size %d, buf_sz %d, p_ofset %llX\n",
+                    paddr_list[0].p_laddr, paddr_list[0].p_size, buf_sz,
+                    paddr_offset);
+                cmn_err(CE_NOTE,"fmr: ibt_map_mem_area: ret %d, num_paddr %d, spec %d\n",
+                    ibt_status, num_paddr, spec);
+     }
+
+        bzero(&pmr_attr, sizeof (ibt_pmr_attr_t));
+        pmr_attr.pmr_iova = (ib_vaddr_t)buf;
+        pmr_attr.pmr_len = size;
+       pmr_attr.pmr_num_buf = num_paddr;
+        pmr_attr.pmr_buf_sz = buf_sz;
+        pmr_attr.pmr_buf_list = paddr_list;
+        pmr_attr.pmr_offset = paddr_offset;
+        pmr_attr.pmr_flags = spec;
+        pmr_attr.pmr_ma = *ma_hdlp;
+
+        ibt_status = ibt_register_physical_fmr(hca->hca_hdl, hca->fmr_pool,
+            &pmr_attr, mr_hdlp, pmr_descp);
+        if (ibt_status != IBT_SUCCESS) {
+                cmn_err(CE_WARN, "rib_reg_mem_fmr: ibt_register_physical_fmr "
+                    "failed: status %d", ibt_status);
+                (void) ibt_unmap_mem_area(hca->hca_hdl, *ma_hdlp);
+               *ma_hdlp=NULL;
+                kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len);
+                return (RDMA_FAILED);
+        }
+
+        if (rib_debug > 0) {
+                cmn_err(CE_NOTE,"fmr: rkey: 0x%lX  lkey: 0x%lX, iova: %p, fmr_hdl %p \n",
+                    pmr_descp->pmd_rkey, pmr_descp->pmd_lkey,
+                    pmr_descp->pmd_iova, *mr_hdlp);
+        }
+
+        kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len);
+
+        return (RDMA_SUCCESS);
+       
+}
+
+#endif
+static rdma_stat
+rib_reg_mem(rib_hca_t *hca,   caddr_t adsp, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
        ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp)
 {
        ibt_mr_attr_t   mem_attr;
        ibt_status_t    ibt_status;
-
        mem_attr.mr_vaddr = (uintptr_t)buf;
        mem_attr.mr_len = (ib_msglen_t)size;
-       mem_attr.mr_as = NULL;
+       mem_attr.mr_as = (struct as *)(caddr_t)adsp;
        mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
            IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE |
            IBT_MR_ENABLE_WINDOW_BIND | spec;
 
        rw_enter(&hca->state_lock, RW_READER);
@@ -3751,58 +4304,160 @@
        }
        return (RDMA_SUCCESS);
 }
 
 rdma_stat
-rib_registermemsync(CONN *conn, caddr_t buf, uint_t buflen,
+rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
+#ifdef SERVER_REG_CACHE
+       struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc)
+#else
        struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle)
+#endif
 {
        ibt_mr_hdl_t    mr_hdl = NULL;  /* memory region handle */
+#ifdef IB_FMR_SUP
+        ibt_pmr_desc_t  pmr_desc;       /* vaddr, lkey, rkey */
+        ibt_ma_hdl_t    ma_hdl = NULL;
+#endif
+#ifdef SERVER_REG_CACHE
+       rib_lrc_entry_t *l;
+#endif
        ibt_mr_desc_t   mr_desc;        /* vaddr, lkey, rkey */
        rdma_stat       status;
        rib_hca_t       *hca = (ctoqp(conn))->hca;
 
        /*
         * Non-coherent memory registration.
         */
-       status = rib_reg_mem(hca, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
+#ifdef SERVER_REG_CACHE
+       l = (rib_lrc_entry_t *)lrc;
+       if(l){
+               if(l->registered){
+               buf_handle->mrc_linfo = (uintptr_t)l->lrc_mhandle.mrc_linfo;
+                buf_handle->mrc_lmr   = (uint32_t)l->lrc_mhandle.mrc_lmr;
+                buf_handle->mrc_rmr   = (uint32_t)l->lrc_mhandle.mrc_rmr;
+#ifdef IB_FMR_SUP
+                buf_handle->mrc_lma   = (uintptr_t)l->lrc_mhandle.mrc_lma;
+#endif
+                *sync_handle          = (RIB_SYNCMEM_HANDLE)l->lrc_mhandle.mrc_linfo;
+               return(RDMA_SUCCESS);
+               } else {
+                       /* Always register the whole buffer */
+                       buf = (caddr_t)l->lrc_buf;
+                       buflen = l->lrc_len;
+                       /*cmn_err(CE_NOTE,"Register %p of length %d\n",buf,buflen);*/
+               }
+            }
+#endif
+#ifdef IB_FMR_SUP
+        status = rib_reg_mem_fmr(hca, adsp, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
+            &ma_hdl, &pmr_desc);
+        if (status == RDMA_SUCCESS) {
+                buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
+                buf_handle->mrc_lma = (uintptr_t)ma_hdl;
+                buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey;
+                buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey;
+                *sync_handle        = (RIB_SYNCMEM_HANDLE)mr_hdl;
+#ifdef SERVER_REG_CACHE
+               if(l){
+               l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl;
+                l->lrc_mhandle.mrc_lmr   = (uint32_t)mr_desc.md_lkey;
+                l->lrc_mhandle.mrc_rmr   = (uint32_t)mr_desc.md_rkey;
+               l->registered                 = TRUE;
+                l->lrc_mhandle.mrc_lma   = (uintptr_t)ma_hdl;
+               }
+#endif
+                goto ret_stat;
+               
+        } else {
+               if (rib_debug > 1)
+                       cmn_err(CE_WARN,"fmr reg failed for buffer %p of length %d\n",buf,buflen);
+                buf_handle->mrc_linfo = NULL;
+                buf_handle->mrc_lma = NULL;
+                buf_handle->mrc_lmr = 0;
+                buf_handle->mrc_rmr = 0;
+        }
+#endif
+       status = rib_reg_mem(hca, adsp, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
                        &mr_desc);
        if (status == RDMA_SUCCESS) {
+#ifdef SERVER_REG_CACHE
+               if(l){
+               l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl;
+                l->lrc_mhandle.mrc_lmr   = (uint32_t)mr_desc.md_lkey;
+                l->lrc_mhandle.mrc_rmr   = (uint32_t)mr_desc.md_rkey;
+               l->registered                 = TRUE;
+               }
+#endif
                buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
                buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
                buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
                *sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
        } else {
                buf_handle->mrc_linfo = NULL;
                buf_handle->mrc_lmr = 0;
                buf_handle->mrc_rmr = 0;
        }
+        ret_stat:
        return (status);
 }
 
 /* ARGSUSED */
 rdma_stat
 rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle)
 {
+       avl_index_t where = NULL;
+#ifdef IB_FMR_SUP
+        ibt_status_t    ibt_status;
+#endif
        rib_hca_t *hca = (ctoqp(conn))->hca;
-
        /*
         * Allow memory deregistration even if HCA is
         * getting detached. Need all outstanding
         * memory registrations to be deregistered
         * before HCA_DETACH_EVENT can be accepted.
         */
+#ifdef IB_FMR_SUP
+       if(buf_handle.mrc_lma){
+        ibt_status = ibt_unmap_mem_area(hca->hca_hdl,
+            (ibt_ma_hdl_t)buf_handle.mrc_lma);
+        if (ibt_status != IBT_SUCCESS){
+                cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed",
+                    ibt_status);
+               return (RDMA_FAILED);
+               }
+
+        ibt_status = ibt_deregister_fmr(hca->hca_hdl,
+            (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
+        if (ibt_status != IBT_SUCCESS)
+                               return (RDMA_FAILED);
+       return (RDMA_SUCCESS);
+       }
+#endif
        (void) ibt_deregister_mr(hca->hca_hdl,
                        (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
        return (RDMA_SUCCESS);
 }
 
 /* ARGSUSED */
 rdma_stat
 rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle,
+#ifdef SERVER_REG_CACHE
+               RIB_SYNCMEM_HANDLE sync_handle, void *lrc)
+#else
                RIB_SYNCMEM_HANDLE sync_handle)
+#endif
 {
+#ifdef SERVER_REG_CACHE
+        rib_lrc_entry_t *l;
+        l = (rib_lrc_entry_t *)lrc;
+       if(l)
+         if(l->registered)
+               return(RDMA_SUCCESS);
+#endif
+
+
        (void) rib_deregistermem(conn, buf, buf_handle);
 
        return (RDMA_SUCCESS);
 }
 
@@ -3877,19 +4532,18 @@
                        num * sizeof (void *), KM_SLEEP);
 
        mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock);
        bp->numelems = num;
 
+
        switch (ptype) {
            case SEND_BUFFER:
                mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
-               /* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
                bp->rsize = RPC_MSG_SZ;
                break;
            case RECV_BUFFER:
                mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
-               /* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
                bp->rsize = RPC_BUF_SIZE;
                break;
            default:
                goto fail;
        }
@@ -3901,14 +4555,14 @@
        bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP);
        rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num *
                        sizeof (ibt_mr_hdl_t), KM_SLEEP);
        rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num *
                        sizeof (ibt_mr_desc_t), KM_SLEEP);
-
        rw_enter(&hca->state_lock, RW_READER);
        if (hca->state != HCA_INITED) {
                rw_exit(&hca->state_lock);
+               cmn_err(CE_WARN,"hca->state != HCA_INITED");
                goto fail;
        }
        for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) {
                bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t));
                mem_attr.mr_vaddr = (uintptr_t)buf;
@@ -3924,11 +4578,10 @@
                    rw_exit(&hca->state_lock);
                    goto fail;
                }
        }
        rw_exit(&hca->state_lock);
-
        buf = (caddr_t)bp->buf;
        for (i = 0; i < num; i++, buf += bp->rsize) {
                bp->buflist[i] = (void *)buf;
        }
        bp->buffree = num - 1;       /* no. of free buffers */
@@ -4015,11 +4668,10 @@
        if (rbp->mr_hdl)
                kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t));
 
        if (rbp->mr_desc)
                kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t));
-
        if (bp->buf)
                kmem_free(bp->buf, bp->bufsize);
        mutex_destroy(&bp->buflock);
        kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *));
        kmem_free(rbp, sizeof (rib_bufpool_t));
@@ -4057,10 +4709,18 @@
                return (RDMA_SUCCESS);
        } else
                return (RDMA_FAILED);
 }
 
+#if defined(MEASURE_POOL_DEPTH)
+static void rib_recv_bufs(uint32_t x) {
+return;
+}
+static void rib_send_bufs(uint32_t x) {
+return;
+}
+#endif
 
 /*
  * Fetch a buffer of specified type.
  * Note that rdbuf->handle is mw's rkey.
  */
@@ -4107,10 +4767,16 @@
        for (i = bp->numelems - 1; i >= 0; i--) {
            if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) {
                rdbuf->handle.mrc_rmr = (uint32_t)rbp->mr_desc[i].md_rkey;
                rdbuf->handle.mrc_linfo = (uintptr_t)rbp->mr_hdl[i];
                rdbuf->handle.mrc_lmr = (uint32_t)rbp->mr_desc[i].md_lkey;
+#if defined(MEASURE_POOL_DEPTH)
+                if(ptype == SEND_BUFFER)
+                rib_send_bufs(MAX_BUFS - (bp->buffree+1));
+                if(ptype == RECV_BUFFER)
+                rib_recv_bufs(MAX_BUFS - (bp->buffree+1));
+#endif
                bp->buffree--;
                if (rib_debug > 1)
                    cmn_err(CE_NOTE, "rib_rbuf_alloc: %d free bufs "
                        "(type %d)\n", bp->buffree+1, ptype);
 
@@ -4958,10 +5624,13 @@
                 * conn_lists are NULL, so destroy
                 * buffers, close hca and be done.
                 */
                rib_rbufpool_destroy(hca, RECV_BUFFER);
                rib_rbufpool_destroy(hca, SEND_BUFFER);
+#ifdef SERVER_REG_CACHE
+               rib_destroy_cache(hca);
+#endif
                (void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
                (void) ibt_close_hca(hca->hca_hdl);
                hca->hca_hdl = NULL;
        }
        rw_exit(&hca->cl_conn_list.conn_lock);
@@ -4981,5 +5650,332 @@
                (void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
                (void) ibt_close_hca(hca->hca_hdl);
                hca->hca_hdl = NULL;
        }
 }
+
+#ifdef SERVER_REG_CACHE
+
+static void
+rib_server_side_cache_reclaim(void *argp)
+{
+cache_avl_struct_t    *rcas;
+rib_lrc_entry_t      *rb;
+rib_hca_t *hca = (rib_hca_t *)argp;
+
+rw_enter(&hca->avl_rw_lock,RW_WRITER);
+rcas = avl_first(&hca->avl_tree);
+if(rcas != NULL)
+               avl_remove(&hca->avl_tree, rcas);
+while(rcas != NULL){
+       while(rcas->r.forw != &rcas->r){
+               rcas->elements--;
+               rb = rcas->r.forw;
+               remque(rb);
+               rib_deregistermem_via_hca(hca, rb->lrc_buf, rb->lrc_mhandle);
+               kmem_free(rb->lrc_buf, rb->lrc_len);
+               kmem_free(rb, sizeof(rib_lrc_entry_t));
+       }
+       mutex_destroy(&rcas->node_lock);
+       kmem_cache_free(hca->server_side_cache,rcas);
+       rcas = avl_first(&hca->avl_tree);
+       if(rcas != NULL)
+                avl_remove(&hca->avl_tree, rcas);
+       }
+rw_exit(&hca->avl_rw_lock);
+}
+
+static int avl_compare(const void *t1,const void *t2) {
+
+if(rib_debug > 1)
+cmn_err(CE_NOTE,"Comparing %d and %d\n",((cache_avl_struct_t *)t1)->len, ((cache_avl_struct_t *)t2)->len);
+if(((cache_avl_struct_t *)t1)->len == ((cache_avl_struct_t *)t2)->len) 
+       return 0;
+
+if(((cache_avl_struct_t *)t1)->len < ((cache_avl_struct_t *)t2)->len) 
+       return -1;
+
+if(((cache_avl_struct_t *)t1)->len > ((cache_avl_struct_t *)t2)->len) 
+       return  1;
+}
+
+static void rib_destroy_cache(rib_hca_t *hca) {
+cache_avl_struct_t    *rcas, *root;
+ rib_lrc_entry_t      *rb;
+
+hca->avl_init = FALSE;
+kmem_cache_destroy(hca->server_side_cache);
+avl_destroy(&hca->avl_tree);
+rw_destroy(&hca->avl_rw_lock);
+
+}
+
+ static rib_lrc_entry_t *
+ rib_get_server_cache_buf(CONN *conn,uint32_t len)
+ {
+        cache_avl_struct_t    cas,*rcas;
+        rib_hca_t       *hca = (ctoqp(conn))->hca;
+        rib_lrc_entry_t *reply_buf;
+        avl_index_t where = NULL;
+        struct rib_lrc_entry *forw = NULL;
+       if(!hca->avl_init)
+               goto  error_alloc;
+        cas.len = len;
+        rw_enter(&hca->avl_rw_lock, RW_READER);
+        if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas, &where)) == NULL){
+        rw_exit(&hca->avl_rw_lock);
+        rw_enter(&hca->avl_rw_lock, RW_WRITER);
+        /* Recheck to make sure no other thread added the entry in */
+        if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas, &where)) == NULL){      
+        /* Allocate an avl tree entry */
+               if(rib_debug > 1)
+               cmn_err(CE_NOTE,"Allocating an avl entry for length %d\n",len);
+               rcas        = (cache_avl_struct_t *)kmem_cache_alloc(hca->server_side_cache,KM_SLEEP);
+               bzero(rcas, sizeof(cache_avl_struct_t));
+               rcas->elements = 0;
+               rcas->r.forw =
+                        &rcas->r;
+               rcas->r.back =
+                        &rcas->r;
+               rcas->len           = len;
+               mutex_init(&rcas->node_lock, NULL, MUTEX_DEFAULT, NULL);
+               avl_insert(&hca->avl_tree,rcas,where);
+       }
+       }  
+       if(rcas->elements > 0){
+                       mutex_enter(&rcas->node_lock);
+                       reply_buf = rcas->r.forw;
+                       remque(reply_buf);
+                       rcas->elements --;
+                       mutex_exit(&rcas->node_lock);
+                       rw_exit(&hca->avl_rw_lock);
+                       if(rib_debug > 1)
+                       cmn_err(CE_NOTE,"Allocating a pre-alloced  buffer for length %d\n",len);
+               } else {
+                       rw_exit(&hca->avl_rw_lock);
+                       rib_total_buffers ++;
+                       if(rib_debug > 1)
+                       cmn_err(CE_NOTE,"Allocating a new  buffer for length %d\n",len);
+                       /* Allocate a reply_buf entry */
+                       reply_buf           = (rib_lrc_entry_t *)kmem_alloc(sizeof(rib_lrc_entry_t), KM_SLEEP);
+                       bzero(reply_buf,sizeof(rib_lrc_entry_t));
+                       reply_buf->lrc_buf  = kmem_alloc(len, KM_SLEEP);
+                       reply_buf->lrc_len  = len;
+                       reply_buf->registered = FALSE;
+                       reply_buf->avl_node = (void *)rcas;
+                       }
+       
+       return reply_buf;       
+       error_alloc:
+       reply_buf           = (rib_lrc_entry_t *)kmem_alloc(sizeof(rib_lrc_entry_t), KM_SLEEP);
+       bzero(reply_buf,sizeof(rib_lrc_entry_t));
+       reply_buf->lrc_buf  = kmem_alloc(len, KM_SLEEP);
+       reply_buf->lrc_len  = len;
+       reply_buf->registered = FALSE;
+       reply_buf->avl_node = NULL;
+       return reply_buf;       
+}
+
+ /*
+  * Return a pre-registered back to the cache (without
+  * unregistering the buffer)..
+  */
+
+static void
+rib_free_server_cache_buf(CONN *conn, rib_lrc_entry_t *reg_buf)
+{
+        cache_avl_struct_t    cas,*rcas;
+        avl_index_t where = NULL;
+         rib_hca_t       *hca = (ctoqp(conn))->hca;
+       if(!reg_buf){
+               cmn_err(CE_WARN,"Got a null reg_buf\n");
+               return;
+        }
+        if(!hca->avl_init)
+               goto  error_free;
+        cas.len = reg_buf->lrc_len;
+        rw_enter(&hca->avl_rw_lock, RW_READER);
+        if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree,&cas,&where)) == NULL){
+               rw_exit(&hca->avl_rw_lock);
+               goto error_free;
+        } else {
+        mutex_enter(&rcas->node_lock); 
+        insque(reg_buf,&rcas->r);
+        rcas->elements ++;
+        mutex_exit(&rcas->node_lock);
+        rw_exit(&hca->avl_rw_lock);
+        if(rib_debug > 1)
+        cmn_err(CE_NOTE,"Returning buffer for length %d\n",reg_buf->lrc_len);
+        } 
+       return;
+       error_free:     
+       rib_deregistermem_via_hca(hca, reg_buf->lrc_buf, reg_buf->lrc_mhandle);
+       kmem_free(reg_buf->lrc_buf,reg_buf->lrc_len);
+       kmem_free(reg_buf,sizeof(rib_lrc_entry_t));
+}
+
+#endif
+
+static rdma_stat
+ rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, caddr_t buf,
+         uint_t buflen, struct mrc *buf_handle)
+ {
+         ibt_mr_hdl_t    mr_hdl = NULL;  /* memory region handle */
+#ifdef IB_FMR_SUP
+        ibt_pmr_desc_t  pmr_desc;       /* vaddr, lkey, rkey */
+        ibt_ma_hdl_t    ma_hdl = NULL;
+#endif
+         ibt_mr_desc_t   mr_desc;        /* vaddr, lkey, rkey */
+         rdma_stat       status;
+
+
+         /*
+          * Note: ALL buffer pools use the same memory type RDMARW.
+         */
+       /* This code will not be activated on the server. We could remove
+          the call to rib_reg_mem_fmr. But leave it in, in case the FMR
+          bugs get fixed. The bigger question is whether we need FMR when
+          the registered bufffers are coming out of a slab cache. This needs
+          to be evaluated. 
+       */
+#ifdef IB_FMR_SUP
+        status = rib_reg_mem_fmr(hca, buf, adsp, buflen, 0, &mr_hdl, &ma_hdl,
+            &pmr_desc);
+        if (status == RDMA_SUCCESS) {
+                buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
+                buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey;
+                buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey;
+                buf_handle->mrc_lma = (uintptr_t)ma_hdl;
+               goto ret_stat;
+        } else {
+                buf_handle->mrc_linfo = NULL;
+                buf_handle->mrc_lma = NULL;
+                buf_handle->mrc_lmr = 0;
+                buf_handle->mrc_rmr = 0;
+        }
+#endif
+         status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
+         if (status == RDMA_SUCCESS) {
+                 buf_handle->mrc_linfo = (uint64_t)mr_hdl;
+                 buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
+                 buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
+         } else {
+                 buf_handle->mrc_linfo = NULL;
+                 buf_handle->mrc_lmr = 0;
+                 buf_handle->mrc_rmr = 0;
+         }
+       ret_stat:
+         return (status);
+}
+
+/* ARGSUSED */
+static rdma_stat
+rib_deregistermemsync_via_hca(rib_hca_t *hca, caddr_t buf,
+         struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle)
+ {
+
+         (void) rib_deregistermem_via_hca(hca, buf, buf_handle);
+
+         return (RDMA_SUCCESS);
+ }
+
+/* ARGSUSED */
+ static rdma_stat
+rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle)
+{
+#ifdef IB_FMR_SUP
+        ibt_status_t    ibt_status;
+       if(buf_handle.mrc_lma){
+        ibt_status = ibt_unmap_mem_area(hca->hca_hdl,
+            (ibt_ma_hdl_t)buf_handle.mrc_lma);
+        if (ibt_status != IBT_SUCCESS){
+                cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed",
+                    ibt_status);
+         return (RDMA_FAILED);
+       }
+        ibt_status = ibt_deregister_fmr(hca->hca_hdl,
+            (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
+        if (ibt_status != IBT_SUCCESS){
+                cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed",
+                    ibt_status);
+         return (RDMA_FAILED);
+       }
+         return (RDMA_SUCCESS);
+       }
+#endif
+
+         (void) ibt_deregister_mr(hca->hca_hdl,
+                         (ibt_mr_hdl_t)buf_handle.mrc_linfo);
+         return (RDMA_SUCCESS);
+}
+
+#if defined(ASYNC_SERVER_DEREG)||defined(ASYNC_CLIENT_DEREG)
+static int
+clist_deregister1(CONN *conn, struct clist *cl, bool_t src)
+{
+        struct clist *c;
+
+        for (c = cl; c; c = c->c_next) {
+                if (src) {
+                        if (c->c_smemhandle.mrc_rmr != 0) {
+                                (void) RDMA_DEREGMEMSYNC(conn,
+                                    (caddr_t)(uintptr_t)c->c_saddr,
+                                    c->c_smemhandle,
+#ifdef SERVER_REG_CACHE
+                                    (void *)(uintptr_t)c->c_ssynchandle, (void *)c->long_reply_buf);
+#else
+                                    (void *)(uintptr_t)c->c_ssynchandle);
+#endif
+                                c->c_smemhandle.mrc_rmr = 0;
+                                c->c_ssynchandle = NULL;
+                        }
+                } else {
+                        if (c->c_dmemhandle.mrc_rmr != 0) {
+                                (void) RDMA_DEREGMEMSYNC(conn,
+                                    (caddr_t)(uintptr_t)c->c_daddr,
+                                    c->c_dmemhandle,
+#ifdef SERVER_REG_CACHE
+                                    (void *)(uintptr_t)c->c_dsynchandle, (void *)c->long_reply_buf);
+#else
+                                    (void *)(uintptr_t)c->c_dsynchandle);
+#endif
+                                c->c_dmemhandle.mrc_rmr = 0;
+                                c->c_dsynchandle = NULL;
+                        }
+                }
+        }
+
+        return (RDMA_SUCCESS);
+}
+#endif
+
+
+
+#if defined(ASYNC_CLIENT_DEREG)
+static void
+async_dereg_thread(caddr_t arg){ 
+       ASYNC *r;
+       cmn_err(CE_WARN,"async_dereg_thread initiated\n");
+       fetch_another_entry:
+       mutex_enter(&at_mutex);
+        while ((rqueue.forw == rqueue.back) && (rqueue.forw == &rqueue))
+       cv_wait(&at_cond, &at_mutex);
+       r=rqueue.forw;  
+       remque(rqueue.forw);    
+       mutex_exit(&at_mutex);
+       /* Process deregistration */
+       clist_deregister1(&r->c_conn, &r->c_clist, FALSE);
+       kmem_free(r, sizeof(ASYNC));
+       goto fetch_another_entry;
+
+} 
+void insert_queue(CONN  *conn, struct clist  *rwc){ 
+ASYNC *r;
+                       r=kmem_zalloc(sizeof(ASYNC),KM_SLEEP);  
+                       r->c_clist = *rwc;
+                       r->c_conn =  *conn; 
+                       mutex_enter(&at_mutex);
+                       insque(r,&rqueue);
+                       cv_broadcast(&at_cond);
+                       mutex_exit(&at_mutex);      
+}
+#endif